LLVM  6.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
60 #include "llvm/IR/Attributes.h"
61 #include "llvm/IR/CallingConv.h"
62 #include "llvm/IR/Constant.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
65 #include "llvm/IR/DebugLoc.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Function.h"
68 #include "llvm/IR/GlobalAlias.h"
69 #include "llvm/IR/GlobalValue.h"
70 #include "llvm/IR/GlobalVariable.h"
71 #include "llvm/IR/IRBuilder.h"
72 #include "llvm/IR/InlineAsm.h"
73 #include "llvm/IR/Instruction.h"
74 #include "llvm/IR/Instructions.h"
75 #include "llvm/IR/IntrinsicInst.h"
76 #include "llvm/IR/Intrinsics.h"
77 #include "llvm/IR/Module.h"
78 #include "llvm/IR/Type.h"
79 #include "llvm/IR/User.h"
80 #include "llvm/IR/Value.h"
81 #include "llvm/MC/MCInstrDesc.h"
83 #include "llvm/MC/MCRegisterInfo.h"
84 #include "llvm/MC/MCSchedule.h"
87 #include "llvm/Support/Casting.h"
88 #include "llvm/Support/CodeGen.h"
90 #include "llvm/Support/Compiler.h"
91 #include "llvm/Support/Debug.h"
93 #include "llvm/Support/KnownBits.h"
99 #include <algorithm>
100 #include <cassert>
101 #include <cstdint>
102 #include <cstdlib>
103 #include <iterator>
104 #include <limits>
105 #include <string>
106 #include <tuple>
107 #include <utility>
108 #include <vector>
109 
110 using namespace llvm;
111 
112 #define DEBUG_TYPE "arm-isel"
113 
114 STATISTIC(NumTailCalls, "Number of tail calls");
115 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
116 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
117 STATISTIC(NumConstpoolPromoted,
118  "Number of constants with their storage promoted into constant pools");
119 
120 static cl::opt<bool>
121 ARMInterworking("arm-interworking", cl::Hidden,
122  cl::desc("Enable / disable ARM interworking (for debugging only)"),
123  cl::init(true));
124 
126  "arm-promote-constant", cl::Hidden,
127  cl::desc("Enable / disable promotion of unnamed_addr constants into "
128  "constant pools"),
129  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
131  "arm-promote-constant-max-size", cl::Hidden,
132  cl::desc("Maximum size of constant to promote into a constant pool"),
133  cl::init(64));
135  "arm-promote-constant-max-total", cl::Hidden,
136  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
137  cl::init(128));
138 
139 // The APCS parameter registers.
140 static const MCPhysReg GPRArgRegs[] = {
141  ARM::R0, ARM::R1, ARM::R2, ARM::R3
142 };
143 
144 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
145  MVT PromotedBitwiseVT) {
146  if (VT != PromotedLdStVT) {
148  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
149 
151  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
152  }
153 
154  MVT ElemTy = VT.getVectorElementType();
155  if (ElemTy != MVT::f64)
159  if (ElemTy == MVT::i32) {
164  } else {
169  }
178  if (VT.isInteger()) {
182  }
183 
184  // Promote all bit-wise operations.
185  if (VT.isInteger() && VT != PromotedBitwiseVT) {
187  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
189  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
191  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
192  }
193 
194  // Neon does not support vector divide/remainder operations.
201 
202  if (!VT.isFloatingPoint() &&
203  VT != MVT::v2i64 && VT != MVT::v1i64)
204  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
205  setOperationAction(Opcode, VT, Legal);
206 }
207 
208 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
209  addRegisterClass(VT, &ARM::DPRRegClass);
210  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
211 }
212 
213 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPairRegClass);
215  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
216 }
217 
219  const ARMSubtarget &STI)
220  : TargetLowering(TM), Subtarget(&STI) {
221  RegInfo = Subtarget->getRegisterInfo();
222  Itins = Subtarget->getInstrItineraryData();
223 
225 
226  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
227  !Subtarget->isTargetWatchOS()) {
228  const auto &E = Subtarget->getTargetTriple().getEnvironment();
229 
230  bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
232  // Windows is a special case. Technically, we will replace all of the "GNU"
233  // calls with calls to MSVCRT if appropriate and adjust the calling
234  // convention then.
235  IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
236 
237  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
238  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
239  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
241  }
242 
243  if (Subtarget->isTargetMachO()) {
244  // Uses VFP for Thumb libfuncs if available.
245  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
246  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
247  static const struct {
248  const RTLIB::Libcall Op;
249  const char * const Name;
250  const ISD::CondCode Cond;
251  } LibraryCalls[] = {
252  // Single-precision floating-point arithmetic.
253  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
255  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
256  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
257 
258  // Double-precision floating-point arithmetic.
259  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
261  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
262  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
263 
264  // Single-precision comparisons.
265  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
266  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
267  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
268  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
269  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
270  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
271  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
272  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
273 
274  // Double-precision comparisons.
275  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
276  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
277  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
278  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
279  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
280  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
281  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
282  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
283 
284  // Floating-point to integer conversions.
285  // i64 conversions are done via library routines even when generating VFP
286  // instructions, so use the same ones.
287  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
289  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
290  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
291 
292  // Conversions between floating types.
293  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
294  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
295 
296  // Integer to floating-point conversions.
297  // i64 conversions are done via library routines even when generating VFP
298  // instructions, so use the same ones.
299  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
300  // e.g., __floatunsidf vs. __floatunssidfvfp.
301  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
302  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
303  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
304  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
305  };
306 
307  for (const auto &LC : LibraryCalls) {
308  setLibcallName(LC.Op, LC.Name);
309  if (LC.Cond != ISD::SETCC_INVALID)
310  setCmpLibcallCC(LC.Op, LC.Cond);
311  }
312  }
313 
314  // Set the correct calling convention for ARMv7k WatchOS. It's just
315  // AAPCS_VFP for functions as simple as libcalls.
316  if (Subtarget->isTargetWatchABI()) {
317  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
319  }
320  }
321 
322  // These libcalls are not available in 32-bit.
323  setLibcallName(RTLIB::SHL_I128, nullptr);
324  setLibcallName(RTLIB::SRL_I128, nullptr);
325  setLibcallName(RTLIB::SRA_I128, nullptr);
326 
327  // RTLIB
328  if (Subtarget->isAAPCS_ABI() &&
329  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
330  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
331  static const struct {
332  const RTLIB::Libcall Op;
333  const char * const Name;
334  const CallingConv::ID CC;
335  const ISD::CondCode Cond;
336  } LibraryCalls[] = {
337  // Double-precision floating-point arithmetic helper functions
338  // RTABI chapter 4.1.2, Table 2
339  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
342  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343 
344  // Double-precision floating-point comparison helper functions
345  // RTABI chapter 4.1.2, Table 3
346  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
347  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
348  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
349  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
350  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
351  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
352  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
353  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
354 
355  // Single-precision floating-point arithmetic helper functions
356  // RTABI chapter 4.1.2, Table 4
357  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
360  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
361 
362  // Single-precision floating-point comparison helper functions
363  // RTABI chapter 4.1.2, Table 5
364  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
365  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
366  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
367  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
368  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
369  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
370  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
371  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
372 
373  // Floating-point to integer conversions.
374  // RTABI chapter 4.1.2, Table 6
375  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
382  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
383 
384  // Conversions between floating types.
385  // RTABI chapter 4.1.2, Table 7
386  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389 
390  // Integer to floating-point conversions.
391  // RTABI chapter 4.1.2, Table 8
392  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
400 
401  // Long long helper functions
402  // RTABI chapter 4.2, Table 9
403  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407 
408  // Integer division functions
409  // RTABI chapter 4.3.1
410  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
417  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
418  };
419 
420  for (const auto &LC : LibraryCalls) {
421  setLibcallName(LC.Op, LC.Name);
422  setLibcallCallingConv(LC.Op, LC.CC);
423  if (LC.Cond != ISD::SETCC_INVALID)
424  setCmpLibcallCC(LC.Op, LC.Cond);
425  }
426 
427  // EABI dependent RTLIB
428  if (TM.Options.EABIVersion == EABI::EABI4 ||
430  static const struct {
431  const RTLIB::Libcall Op;
432  const char *const Name;
433  const CallingConv::ID CC;
434  const ISD::CondCode Cond;
435  } MemOpsLibraryCalls[] = {
436  // Memory operations
437  // RTABI chapter 4.3.4
439  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
440  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
441  };
442 
443  for (const auto &LC : MemOpsLibraryCalls) {
444  setLibcallName(LC.Op, LC.Name);
445  setLibcallCallingConv(LC.Op, LC.CC);
446  if (LC.Cond != ISD::SETCC_INVALID)
447  setCmpLibcallCC(LC.Op, LC.Cond);
448  }
449  }
450  }
451 
452  if (Subtarget->isTargetWindows()) {
453  static const struct {
454  const RTLIB::Libcall Op;
455  const char * const Name;
456  const CallingConv::ID CC;
457  } LibraryCalls[] = {
458  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
459  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
460  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
461  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
462  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
463  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
464  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
465  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
466  };
467 
468  for (const auto &LC : LibraryCalls) {
469  setLibcallName(LC.Op, LC.Name);
470  setLibcallCallingConv(LC.Op, LC.CC);
471  }
472  }
473 
474  // Use divmod compiler-rt calls for iOS 5.0 and later.
475  if (Subtarget->isTargetMachO() &&
476  !(Subtarget->isTargetIOS() &&
477  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
478  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
479  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
480  }
481 
482  // The half <-> float conversion functions are always soft-float on
483  // non-watchos platforms, but are needed for some targets which use a
484  // hard-float calling convention by default.
485  if (!Subtarget->isTargetWatchABI()) {
486  if (Subtarget->isAAPCS_ABI()) {
487  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
488  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
489  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
490  } else {
491  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
492  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
493  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
494  }
495  }
496 
497  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
498  // a __gnu_ prefix (which is the default).
499  if (Subtarget->isTargetAEABI()) {
500  static const struct {
501  const RTLIB::Libcall Op;
502  const char * const Name;
503  const CallingConv::ID CC;
504  } LibraryCalls[] = {
505  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
506  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
507  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
508  };
509 
510  for (const auto &LC : LibraryCalls) {
511  setLibcallName(LC.Op, LC.Name);
512  setLibcallCallingConv(LC.Op, LC.CC);
513  }
514  }
515 
516  if (Subtarget->isThumb1Only())
517  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
518  else
519  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
520 
521  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
522  !Subtarget->isThumb1Only()) {
523  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
524  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
525  }
526 
527  for (MVT VT : MVT::vector_valuetypes()) {
528  for (MVT InnerVT : MVT::vector_valuetypes()) {
529  setTruncStoreAction(VT, InnerVT, Expand);
530  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
531  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
532  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
533  }
534 
539 
541  }
542 
545 
548 
549  if (Subtarget->hasNEON()) {
550  addDRTypeForNEON(MVT::v2f32);
551  addDRTypeForNEON(MVT::v8i8);
552  addDRTypeForNEON(MVT::v4i16);
553  addDRTypeForNEON(MVT::v2i32);
554  addDRTypeForNEON(MVT::v1i64);
555 
556  addQRTypeForNEON(MVT::v4f32);
557  addQRTypeForNEON(MVT::v2f64);
558  addQRTypeForNEON(MVT::v16i8);
559  addQRTypeForNEON(MVT::v8i16);
560  addQRTypeForNEON(MVT::v4i32);
561  addQRTypeForNEON(MVT::v2i64);
562 
563  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
564  // neither Neon nor VFP support any arithmetic operations on it.
565  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
566  // supported for v4f32.
570  // FIXME: Code duplication: FDIV and FREM are expanded always, see
571  // ARMTargetLowering::addTypeForNEON method for details.
574  // FIXME: Create unittest.
575  // In another words, find a way when "copysign" appears in DAG with vector
576  // operands.
578  // FIXME: Code duplication: SETCC has custom operation action, see
579  // ARMTargetLowering::addTypeForNEON method for details.
581  // FIXME: Create unittest for FNEG and for FABS.
593  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
600 
615 
616  // Mark v2f32 intrinsics.
631 
632  // Neon does not support some operations on v1i64 and v2i64 types.
634  // Custom handling for some quad-vector types to detect VMULL.
638  // Custom handling for some vector types to avoid expensive expansions
643  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
644  // a destination type that is wider than the source, and nor does
645  // it have a FP_TO_[SU]INT instruction with a narrower destination than
646  // source.
651 
654 
655  // NEON does not have single instruction CTPOP for vectors with element
656  // types wider than 8-bits. However, custom lowering can leverage the
657  // v8i8/v16i8 vcnt instruction.
664 
667 
668  // NEON does not have single instruction CTTZ for vectors.
673 
678 
683 
688 
689  // NEON only has FMA instructions as of VFP4.
690  if (!Subtarget->hasVFP4()) {
693  }
694 
712 
713  // It is legal to extload from v4i8 to v4i16 or v4i32.
715  MVT::v2i32}) {
716  for (MVT VT : MVT::integer_vector_valuetypes()) {
720  }
721  }
722  }
723 
724  if (Subtarget->isFPOnlySP()) {
725  // When targeting a floating-point unit with only single-precision
726  // operations, f64 is legal for the few double-precision instructions which
727  // are present However, no double-precision operations other than moves,
728  // loads and stores are provided by the hardware.
761  }
762 
764 
765  // ARM does not have floating-point extending loads.
766  for (MVT VT : MVT::fp_valuetypes()) {
769  }
770 
771  // ... or truncating stores
775 
776  // ARM does not have i1 sign extending load.
777  for (MVT VT : MVT::integer_valuetypes())
779 
780  // ARM supports all 4 flavors of integer indexed load / store.
781  if (!Subtarget->isThumb1Only()) {
782  for (unsigned im = (unsigned)ISD::PRE_INC;
792  }
793  } else {
794  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
797  }
798 
803 
804  // i64 operation support.
807  if (Subtarget->isThumb1Only()) {
810  }
811  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
812  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
814 
821 
826 
827  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
829 
830  // ARM does not have ROTL.
832  for (MVT VT : MVT::vector_valuetypes()) {
835  }
838  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
840 
841  // @llvm.readcyclecounter requires the Performance Monitors extension.
842  // Default to the 0 expansion on unsupported platforms.
843  // FIXME: Technically there are older ARM CPUs that have
844  // implementation-specific ways of obtaining this information.
845  if (Subtarget->hasPerfMon())
847 
848  // Only ARMv6 has BSWAP.
849  if (!Subtarget->hasV6Ops())
851 
852  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
853  : Subtarget->hasDivideInARMMode();
854  if (!hasDivide) {
855  // These are expanded into libcalls if the cpu doesn't have HW divider.
858  }
859 
860  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
863 
866  }
867 
870 
871  // Register based DivRem for AEABI (RTABI 4.2)
872  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
873  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
874  Subtarget->isTargetWindows()) {
877  HasStandaloneRem = false;
878 
879  if (Subtarget->isTargetWindows()) {
880  const struct {
881  const RTLIB::Libcall Op;
882  const char * const Name;
883  const CallingConv::ID CC;
884  } LibraryCalls[] = {
885  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
886  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
887  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
888  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
889 
890  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
891  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
892  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
893  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
894  };
895 
896  for (const auto &LC : LibraryCalls) {
897  setLibcallName(LC.Op, LC.Name);
898  setLibcallCallingConv(LC.Op, LC.CC);
899  }
900  } else {
901  const struct {
902  const RTLIB::Libcall Op;
903  const char * const Name;
904  const CallingConv::ID CC;
905  } LibraryCalls[] = {
906  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
907  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
908  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
909  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
910 
911  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
912  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
913  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
914  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
915  };
916 
917  for (const auto &LC : LibraryCalls) {
918  setLibcallName(LC.Op, LC.Name);
919  setLibcallCallingConv(LC.Op, LC.CC);
920  }
921  }
922 
927  } else {
930  }
931 
932  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
933  for (auto &VT : {MVT::f32, MVT::f64})
935 
940 
942 
943  // Use the default implementation.
950 
951  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
953  else
955 
956  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
957  // the default expansion.
958  InsertFencesForAtomic = false;
959  if (Subtarget->hasAnyDataBarrier() &&
960  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
961  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
962  // to ldrex/strex loops already.
964  if (!Subtarget->isThumb() || !Subtarget->isMClass())
966 
967  // On v8, we have particularly efficient implementations of atomic fences
968  // if they can be combined with nearby atomic loads and stores.
969  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
970  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
971  InsertFencesForAtomic = true;
972  }
973  } else {
974  // If there's anything we can use as a barrier, go through custom lowering
975  // for ATOMIC_FENCE.
976  // If target has DMB in thumb, Fences can be inserted.
977  if (Subtarget->hasDataBarrier())
978  InsertFencesForAtomic = true;
979 
981  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
982 
983  // Set them all for expansion, which will force libcalls.
996  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
997  // Unordered/Monotonic case.
998  if (!InsertFencesForAtomic) {
1001  }
1002  }
1003 
1005 
1006  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1007  if (!Subtarget->hasV6Ops()) {
1010  }
1012 
1013  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1014  !Subtarget->isThumb1Only()) {
1015  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1016  // iff target supports vfp2.
1019  }
1020 
1021  // We want to custom lower some of our intrinsics.
1026  if (Subtarget->useSjLjEH())
1027  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1028 
1038 
1039  // Thumb-1 cannot currently select ARMISD::SUBE.
1040  if (!Subtarget->isThumb1Only())
1042 
1048 
1049  // We don't support sin/cos/fmod/copysign/pow
1058  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1059  !Subtarget->isThumb1Only()) {
1062  }
1065 
1066  if (!Subtarget->hasVFP4()) {
1069  }
1070 
1071  // Various VFP goodness
1072  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1073  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1074  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1077  }
1078 
1079  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1080  if (!Subtarget->hasFP16()) {
1083  }
1084  }
1085 
1086  // Combine sin / cos into one node or libcall if possible.
1087  if (Subtarget->hasSinCos()) {
1088  setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1089  setLibcallName(RTLIB::SINCOS_F64, "sincos");
1090  if (Subtarget->isTargetWatchABI()) {
1093  }
1094  if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1095  // For iOS, we don't want to the normal expansion of a libcall to
1096  // sincos. We want to issue a libcall to __sincos_stret.
1099  }
1100  }
1101 
1102  // FP-ARMv8 implements a lot of rounding-like FP operations.
1103  if (Subtarget->hasFPARMv8()) {
1116 
1117  if (!Subtarget->isFPOnlySP()) {
1126  }
1127  }
1128 
1129  if (Subtarget->hasNEON()) {
1130  // vmin and vmax aren't available in a scalar form, so we use
1131  // a NEON instruction with an undef lane instead.
1138  }
1139 
1140  // We have target-specific dag combine patterns for the following nodes:
1141  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1148 
1149  if (Subtarget->hasV6Ops())
1151 
1153 
1154  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1155  !Subtarget->hasVFP2())
1157  else
1159 
1160  //// temporary - rewrite interface to use type
1161  MaxStoresPerMemset = 8;
1163  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1165  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1167 
1168  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1169  // are at least 4 bytes aligned.
1171 
1172  // Prefer likely predicted branches to selects on out-of-order cores.
1173  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1174 
1175  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1176 }
1177 
1179  return Subtarget->useSoftFloat();
1180 }
1181 
1182 // FIXME: It might make sense to define the representative register class as the
1183 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1184 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1185 // SPR's representative would be DPR_VFP2. This should work well if register
1186 // pressure tracking were modified such that a register use would increment the
1187 // pressure of the register class's representative and all of it's super
1188 // classes' representatives transitively. We have not implemented this because
1189 // of the difficulty prior to coalescing of modeling operand register classes
1190 // due to the common occurrence of cross class copies and subregister insertions
1191 // and extractions.
1192 std::pair<const TargetRegisterClass *, uint8_t>
1194  MVT VT) const {
1195  const TargetRegisterClass *RRC = nullptr;
1196  uint8_t Cost = 1;
1197  switch (VT.SimpleTy) {
1198  default:
1200  // Use DPR as representative register class for all floating point
1201  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1202  // the cost is 1 for both f32 and f64.
1203  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1204  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1205  RRC = &ARM::DPRRegClass;
1206  // When NEON is used for SP, only half of the register file is available
1207  // because operations that define both SP and DP results will be constrained
1208  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1209  // coalescing by double-counting the SP regs. See the FIXME above.
1210  if (Subtarget->useNEONForSinglePrecisionFP())
1211  Cost = 2;
1212  break;
1213  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1214  case MVT::v4f32: case MVT::v2f64:
1215  RRC = &ARM::DPRRegClass;
1216  Cost = 2;
1217  break;
1218  case MVT::v4i64:
1219  RRC = &ARM::DPRRegClass;
1220  Cost = 4;
1221  break;
1222  case MVT::v8i64:
1223  RRC = &ARM::DPRRegClass;
1224  Cost = 8;
1225  break;
1226  }
1227  return std::make_pair(RRC, Cost);
1228 }
1229 
1230 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1231  switch ((ARMISD::NodeType)Opcode) {
1232  case ARMISD::FIRST_NUMBER: break;
1233  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1234  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1235  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1236  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1237  case ARMISD::CALL: return "ARMISD::CALL";
1238  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1239  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1240  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1241  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1242  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1243  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1244  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1245  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1246  case ARMISD::CMP: return "ARMISD::CMP";
1247  case ARMISD::CMN: return "ARMISD::CMN";
1248  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1249  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1250  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1251  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1252  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1253 
1254  case ARMISD::CMOV: return "ARMISD::CMOV";
1255 
1256  case ARMISD::SSAT: return "ARMISD::SSAT";
1257 
1258  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1259  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1260  case ARMISD::RRX: return "ARMISD::RRX";
1261 
1262  case ARMISD::ADDC: return "ARMISD::ADDC";
1263  case ARMISD::ADDE: return "ARMISD::ADDE";
1264  case ARMISD::SUBC: return "ARMISD::SUBC";
1265  case ARMISD::SUBE: return "ARMISD::SUBE";
1266 
1267  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1268  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1269 
1270  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1271  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1272  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1273 
1274  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1275 
1276  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1277 
1278  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1279 
1280  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1281 
1282  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1283 
1284  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1285  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1286 
1287  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1288  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1289  case ARMISD::VCGE: return "ARMISD::VCGE";
1290  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1291  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1292  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1293  case ARMISD::VCGT: return "ARMISD::VCGT";
1294  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1295  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1296  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1297  case ARMISD::VTST: return "ARMISD::VTST";
1298 
1299  case ARMISD::VSHL: return "ARMISD::VSHL";
1300  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1301  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1302  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1303  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1304  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1305  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1306  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1307  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1308  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1309  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1310  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1311  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1312  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1313  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1314  case ARMISD::VSLI: return "ARMISD::VSLI";
1315  case ARMISD::VSRI: return "ARMISD::VSRI";
1316  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1317  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1318  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1319  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1320  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1321  case ARMISD::VDUP: return "ARMISD::VDUP";
1322  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1323  case ARMISD::VEXT: return "ARMISD::VEXT";
1324  case ARMISD::VREV64: return "ARMISD::VREV64";
1325  case ARMISD::VREV32: return "ARMISD::VREV32";
1326  case ARMISD::VREV16: return "ARMISD::VREV16";
1327  case ARMISD::VZIP: return "ARMISD::VZIP";
1328  case ARMISD::VUZP: return "ARMISD::VUZP";
1329  case ARMISD::VTRN: return "ARMISD::VTRN";
1330  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1331  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1332  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1333  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1334  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1335  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1336  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1337  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1338  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1339  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1340  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1341  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1342  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1343  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1344  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1345  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1346  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1347  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1348  case ARMISD::BFI: return "ARMISD::BFI";
1349  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1350  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1351  case ARMISD::VBSL: return "ARMISD::VBSL";
1352  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1353  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1354  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1355  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1356  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1357  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1358  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1359  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1360  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1361  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1362  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1363  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1364  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1365  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1366  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1367  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1368  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1369  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1370  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1371  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1372  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1373  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1374  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1375  }
1376  return nullptr;
1377 }
1378 
1380  EVT VT) const {
1381  if (!VT.isVector())
1382  return getPointerTy(DL);
1384 }
1385 
1386 /// getRegClassFor - Return the register class that should be used for the
1387 /// specified value type.
1389  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1390  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1391  // load / store 4 to 8 consecutive D registers.
1392  if (Subtarget->hasNEON()) {
1393  if (VT == MVT::v4i64)
1394  return &ARM::QQPRRegClass;
1395  if (VT == MVT::v8i64)
1396  return &ARM::QQQQPRRegClass;
1397  }
1398  return TargetLowering::getRegClassFor(VT);
1399 }
1400 
1401 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1402 // source/dest is aligned and the copy size is large enough. We therefore want
1403 // to align such objects passed to memory intrinsics.
1405  unsigned &PrefAlign) const {
1406  if (!isa<MemIntrinsic>(CI))
1407  return false;
1408  MinSize = 8;
1409  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1410  // cycle faster than 4-byte aligned LDM.
1411  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1412  return true;
1413 }
1414 
1415 // Create a fast isel object.
1416 FastISel *
1418  const TargetLibraryInfo *libInfo) const {
1419  return ARM::createFastISel(funcInfo, libInfo);
1420 }
1421 
1423  unsigned NumVals = N->getNumValues();
1424  if (!NumVals)
1425  return Sched::RegPressure;
1426 
1427  for (unsigned i = 0; i != NumVals; ++i) {
1428  EVT VT = N->getValueType(i);
1429  if (VT == MVT::Glue || VT == MVT::Other)
1430  continue;
1431  if (VT.isFloatingPoint() || VT.isVector())
1432  return Sched::ILP;
1433  }
1434 
1435  if (!N->isMachineOpcode())
1436  return Sched::RegPressure;
1437 
1438  // Load are scheduled for latency even if there instruction itinerary
1439  // is not available.
1440  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1441  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1442 
1443  if (MCID.getNumDefs() == 0)
1444  return Sched::RegPressure;
1445  if (!Itins->isEmpty() &&
1446  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1447  return Sched::ILP;
1448 
1449  return Sched::RegPressure;
1450 }
1451 
1452 //===----------------------------------------------------------------------===//
1453 // Lowering Code
1454 //===----------------------------------------------------------------------===//
1455 
1456 static bool isSRL16(const SDValue &Op) {
1457  if (Op.getOpcode() != ISD::SRL)
1458  return false;
1459  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1460  return Const->getZExtValue() == 16;
1461  return false;
1462 }
1463 
1464 static bool isSRA16(const SDValue &Op) {
1465  if (Op.getOpcode() != ISD::SRA)
1466  return false;
1467  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1468  return Const->getZExtValue() == 16;
1469  return false;
1470 }
1471 
1472 static bool isSHL16(const SDValue &Op) {
1473  if (Op.getOpcode() != ISD::SHL)
1474  return false;
1475  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1476  return Const->getZExtValue() == 16;
1477  return false;
1478 }
1479 
1480 // Check for a signed 16-bit value. We special case SRA because it makes it
1481 // more simple when also looking for SRAs that aren't sign extending a
1482 // smaller value. Without the check, we'd need to take extra care with
1483 // checking order for some operations.
1484 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1485  if (isSRA16(Op))
1486  return isSHL16(Op.getOperand(0));
1487  return DAG.ComputeNumSignBits(Op) == 17;
1488 }
1489 
1490 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1492  switch (CC) {
1493  default: llvm_unreachable("Unknown condition code!");
1494  case ISD::SETNE: return ARMCC::NE;
1495  case ISD::SETEQ: return ARMCC::EQ;
1496  case ISD::SETGT: return ARMCC::GT;
1497  case ISD::SETGE: return ARMCC::GE;
1498  case ISD::SETLT: return ARMCC::LT;
1499  case ISD::SETLE: return ARMCC::LE;
1500  case ISD::SETUGT: return ARMCC::HI;
1501  case ISD::SETUGE: return ARMCC::HS;
1502  case ISD::SETULT: return ARMCC::LO;
1503  case ISD::SETULE: return ARMCC::LS;
1504  }
1505 }
1506 
1507 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1509  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1510  CondCode2 = ARMCC::AL;
1511  InvalidOnQNaN = true;
1512  switch (CC) {
1513  default: llvm_unreachable("Unknown FP condition!");
1514  case ISD::SETEQ:
1515  case ISD::SETOEQ:
1516  CondCode = ARMCC::EQ;
1517  InvalidOnQNaN = false;
1518  break;
1519  case ISD::SETGT:
1520  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1521  case ISD::SETGE:
1522  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1523  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1524  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1525  case ISD::SETONE:
1526  CondCode = ARMCC::MI;
1527  CondCode2 = ARMCC::GT;
1528  InvalidOnQNaN = false;
1529  break;
1530  case ISD::SETO: CondCode = ARMCC::VC; break;
1531  case ISD::SETUO: CondCode = ARMCC::VS; break;
1532  case ISD::SETUEQ:
1533  CondCode = ARMCC::EQ;
1534  CondCode2 = ARMCC::VS;
1535  InvalidOnQNaN = false;
1536  break;
1537  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1538  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1539  case ISD::SETLT:
1540  case ISD::SETULT: CondCode = ARMCC::LT; break;
1541  case ISD::SETLE:
1542  case ISD::SETULE: CondCode = ARMCC::LE; break;
1543  case ISD::SETNE:
1544  case ISD::SETUNE:
1545  CondCode = ARMCC::NE;
1546  InvalidOnQNaN = false;
1547  break;
1548  }
1549 }
1550 
1551 //===----------------------------------------------------------------------===//
1552 // Calling Convention Implementation
1553 //===----------------------------------------------------------------------===//
1554 
1555 #include "ARMGenCallingConv.inc"
1556 
1557 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1558 /// account presence of floating point hardware and calling convention
1559 /// limitations, such as support for variadic functions.
1561 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1562  bool isVarArg) const {
1563  switch (CC) {
1564  default:
1565  llvm_unreachable("Unsupported calling convention");
1567  case CallingConv::ARM_APCS:
1568  case CallingConv::GHC:
1569  return CC;
1573  case CallingConv::Swift:
1575  case CallingConv::C:
1576  if (!Subtarget->isAAPCS_ABI())
1577  return CallingConv::ARM_APCS;
1578  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1580  !isVarArg)
1582  else
1583  return CallingConv::ARM_AAPCS;
1584  case CallingConv::Fast:
1586  if (!Subtarget->isAAPCS_ABI()) {
1587  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1588  return CallingConv::Fast;
1589  return CallingConv::ARM_APCS;
1590  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1592  else
1593  return CallingConv::ARM_AAPCS;
1594  }
1595 }
1596 
1598  bool isVarArg) const {
1599  return CCAssignFnForNode(CC, false, isVarArg);
1600 }
1601 
1603  bool isVarArg) const {
1604  return CCAssignFnForNode(CC, true, isVarArg);
1605 }
1606 
1607 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1608 /// CallingConvention.
1609 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1610  bool Return,
1611  bool isVarArg) const {
1612  switch (getEffectiveCallingConv(CC, isVarArg)) {
1613  default:
1614  llvm_unreachable("Unsupported calling convention");
1615  case CallingConv::ARM_APCS:
1616  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1618  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1620  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1621  case CallingConv::Fast:
1622  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1623  case CallingConv::GHC:
1624  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1626  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1627  }
1628 }
1629 
1630 /// LowerCallResult - Lower the result values of a call into the
1631 /// appropriate copies out of appropriate physical registers.
1632 SDValue ARMTargetLowering::LowerCallResult(
1633  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1634  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1635  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1636  SDValue ThisVal) const {
1637 
1638  // Assign locations to each value returned by this call.
1640  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1641  *DAG.getContext());
1642  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1643 
1644  // Copy all of the result registers out of their specified physreg.
1645  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1646  CCValAssign VA = RVLocs[i];
1647 
1648  // Pass 'this' value directly from the argument to return value, to avoid
1649  // reg unit interference
1650  if (i == 0 && isThisReturn) {
1651  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1652  "unexpected return calling convention register assignment");
1653  InVals.push_back(ThisVal);
1654  continue;
1655  }
1656 
1657  SDValue Val;
1658  if (VA.needsCustom()) {
1659  // Handle f64 or half of a v2f64.
1660  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1661  InFlag);
1662  Chain = Lo.getValue(1);
1663  InFlag = Lo.getValue(2);
1664  VA = RVLocs[++i]; // skip ahead to next loc
1665  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1666  InFlag);
1667  Chain = Hi.getValue(1);
1668  InFlag = Hi.getValue(2);
1669  if (!Subtarget->isLittle())
1670  std::swap (Lo, Hi);
1671  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1672 
1673  if (VA.getLocVT() == MVT::v2f64) {
1674  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1675  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1676  DAG.getConstant(0, dl, MVT::i32));
1677 
1678  VA = RVLocs[++i]; // skip ahead to next loc
1679  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1680  Chain = Lo.getValue(1);
1681  InFlag = Lo.getValue(2);
1682  VA = RVLocs[++i]; // skip ahead to next loc
1683  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1684  Chain = Hi.getValue(1);
1685  InFlag = Hi.getValue(2);
1686  if (!Subtarget->isLittle())
1687  std::swap (Lo, Hi);
1688  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1689  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1690  DAG.getConstant(1, dl, MVT::i32));
1691  }
1692  } else {
1693  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1694  InFlag);
1695  Chain = Val.getValue(1);
1696  InFlag = Val.getValue(2);
1697  }
1698 
1699  switch (VA.getLocInfo()) {
1700  default: llvm_unreachable("Unknown loc info!");
1701  case CCValAssign::Full: break;
1702  case CCValAssign::BCvt:
1703  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1704  break;
1705  }
1706 
1707  InVals.push_back(Val);
1708  }
1709 
1710  return Chain;
1711 }
1712 
1713 /// LowerMemOpCallTo - Store the argument to the stack.
1714 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1715  SDValue Arg, const SDLoc &dl,
1716  SelectionDAG &DAG,
1717  const CCValAssign &VA,
1718  ISD::ArgFlagsTy Flags) const {
1719  unsigned LocMemOffset = VA.getLocMemOffset();
1720  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1721  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1722  StackPtr, PtrOff);
1723  return DAG.getStore(
1724  Chain, dl, Arg, PtrOff,
1725  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1726 }
1727 
1728 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1729  SDValue Chain, SDValue &Arg,
1730  RegsToPassVector &RegsToPass,
1731  CCValAssign &VA, CCValAssign &NextVA,
1732  SDValue &StackPtr,
1733  SmallVectorImpl<SDValue> &MemOpChains,
1734  ISD::ArgFlagsTy Flags) const {
1735 
1736  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1737  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1738  unsigned id = Subtarget->isLittle() ? 0 : 1;
1739  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1740 
1741  if (NextVA.isRegLoc())
1742  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1743  else {
1744  assert(NextVA.isMemLoc());
1745  if (!StackPtr.getNode())
1746  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1747  getPointerTy(DAG.getDataLayout()));
1748 
1749  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1750  dl, DAG, NextVA,
1751  Flags));
1752  }
1753 }
1754 
1755 /// LowerCall - Lowering a call into a callseq_start <-
1756 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1757 /// nodes.
1758 SDValue
1759 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1760  SmallVectorImpl<SDValue> &InVals) const {
1761  SelectionDAG &DAG = CLI.DAG;
1762  SDLoc &dl = CLI.DL;
1764  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1766  SDValue Chain = CLI.Chain;
1767  SDValue Callee = CLI.Callee;
1768  bool &isTailCall = CLI.IsTailCall;
1769  CallingConv::ID CallConv = CLI.CallConv;
1770  bool doesNotRet = CLI.DoesNotReturn;
1771  bool isVarArg = CLI.IsVarArg;
1772 
1773  MachineFunction &MF = DAG.getMachineFunction();
1774  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1775  bool isThisReturn = false;
1776  bool isSibCall = false;
1777  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1778 
1779  // Disable tail calls if they're not supported.
1780  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1781  isTailCall = false;
1782 
1783  if (isTailCall) {
1784  // Check if it's really possible to do a tail call.
1785  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1786  isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1787  Outs, OutVals, Ins, DAG);
1788  if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1789  report_fatal_error("failed to perform tail call elimination on a call "
1790  "site marked musttail");
1791  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1792  // detected sibcalls.
1793  if (isTailCall) {
1794  ++NumTailCalls;
1795  isSibCall = true;
1796  }
1797  }
1798 
1799  // Analyze operands of the call, assigning locations to each operand.
1801  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1802  *DAG.getContext());
1803  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1804 
1805  // Get a count of how many bytes are to be pushed on the stack.
1806  unsigned NumBytes = CCInfo.getNextStackOffset();
1807 
1808  // For tail calls, memory operands are available in our caller's stack.
1809  if (isSibCall)
1810  NumBytes = 0;
1811 
1812  // Adjust the stack pointer for the new arguments...
1813  // These operations are automatically eliminated by the prolog/epilog pass
1814  if (!isSibCall)
1815  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1816 
1817  SDValue StackPtr =
1818  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1819 
1820  RegsToPassVector RegsToPass;
1821  SmallVector<SDValue, 8> MemOpChains;
1822 
1823  // Walk the register/memloc assignments, inserting copies/loads. In the case
1824  // of tail call optimization, arguments are handled later.
1825  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1826  i != e;
1827  ++i, ++realArgIdx) {
1828  CCValAssign &VA = ArgLocs[i];
1829  SDValue Arg = OutVals[realArgIdx];
1830  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1831  bool isByVal = Flags.isByVal();
1832 
1833  // Promote the value if needed.
1834  switch (VA.getLocInfo()) {
1835  default: llvm_unreachable("Unknown loc info!");
1836  case CCValAssign::Full: break;
1837  case CCValAssign::SExt:
1838  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1839  break;
1840  case CCValAssign::ZExt:
1841  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1842  break;
1843  case CCValAssign::AExt:
1844  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1845  break;
1846  case CCValAssign::BCvt:
1847  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1848  break;
1849  }
1850 
1851  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1852  if (VA.needsCustom()) {
1853  if (VA.getLocVT() == MVT::v2f64) {
1854  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1855  DAG.getConstant(0, dl, MVT::i32));
1856  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1857  DAG.getConstant(1, dl, MVT::i32));
1858 
1859  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1860  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1861 
1862  VA = ArgLocs[++i]; // skip ahead to next loc
1863  if (VA.isRegLoc()) {
1864  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1865  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1866  } else {
1867  assert(VA.isMemLoc());
1868 
1869  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1870  dl, DAG, VA, Flags));
1871  }
1872  } else {
1873  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1874  StackPtr, MemOpChains, Flags);
1875  }
1876  } else if (VA.isRegLoc()) {
1877  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1878  Outs[0].VT == MVT::i32) {
1879  assert(VA.getLocVT() == MVT::i32 &&
1880  "unexpected calling convention register assignment");
1881  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1882  "unexpected use of 'returned'");
1883  isThisReturn = true;
1884  }
1885  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1886  } else if (isByVal) {
1887  assert(VA.isMemLoc());
1888  unsigned offset = 0;
1889 
1890  // True if this byval aggregate will be split between registers
1891  // and memory.
1892  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1893  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1894 
1895  if (CurByValIdx < ByValArgsCount) {
1896 
1897  unsigned RegBegin, RegEnd;
1898  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1899 
1900  EVT PtrVT =
1902  unsigned int i, j;
1903  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1904  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1905  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1906  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1908  DAG.InferPtrAlignment(AddArg));
1909  MemOpChains.push_back(Load.getValue(1));
1910  RegsToPass.push_back(std::make_pair(j, Load));
1911  }
1912 
1913  // If parameter size outsides register area, "offset" value
1914  // helps us to calculate stack slot for remained part properly.
1915  offset = RegEnd - RegBegin;
1916 
1917  CCInfo.nextInRegsParam();
1918  }
1919 
1920  if (Flags.getByValSize() > 4*offset) {
1921  auto PtrVT = getPointerTy(DAG.getDataLayout());
1922  unsigned LocMemOffset = VA.getLocMemOffset();
1923  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1924  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1925  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1926  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1927  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1928  MVT::i32);
1929  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1930  MVT::i32);
1931 
1932  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1933  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1934  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1935  Ops));
1936  }
1937  } else if (!isSibCall) {
1938  assert(VA.isMemLoc());
1939 
1940  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1941  dl, DAG, VA, Flags));
1942  }
1943  }
1944 
1945  if (!MemOpChains.empty())
1946  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1947 
1948  // Build a sequence of copy-to-reg nodes chained together with token chain
1949  // and flag operands which copy the outgoing args into the appropriate regs.
1950  SDValue InFlag;
1951  // Tail call byval lowering might overwrite argument registers so in case of
1952  // tail call optimization the copies to registers are lowered later.
1953  if (!isTailCall)
1954  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1955  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1956  RegsToPass[i].second, InFlag);
1957  InFlag = Chain.getValue(1);
1958  }
1959 
1960  // For tail calls lower the arguments to the 'real' stack slot.
1961  if (isTailCall) {
1962  // Force all the incoming stack arguments to be loaded from the stack
1963  // before any new outgoing arguments are stored to the stack, because the
1964  // outgoing stack slots may alias the incoming argument stack slots, and
1965  // the alias isn't otherwise explicit. This is slightly more conservative
1966  // than necessary, because it means that each store effectively depends
1967  // on every argument instead of just those arguments it would clobber.
1968 
1969  // Do not flag preceding copytoreg stuff together with the following stuff.
1970  InFlag = SDValue();
1971  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1972  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1973  RegsToPass[i].second, InFlag);
1974  InFlag = Chain.getValue(1);
1975  }
1976  InFlag = SDValue();
1977  }
1978 
1979  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1980  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1981  // node so that legalize doesn't hack it.
1982  bool isDirect = false;
1983 
1984  const TargetMachine &TM = getTargetMachine();
1985  const Module *Mod = MF.getFunction()->getParent();
1986  const GlobalValue *GV = nullptr;
1987  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1988  GV = G->getGlobal();
1989  bool isStub =
1990  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1991 
1992  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1993  bool isLocalARMFunc = false;
1995  auto PtrVt = getPointerTy(DAG.getDataLayout());
1996 
1997  if (Subtarget->genLongCalls()) {
1998  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
1999  "long-calls codegen is not position independent!");
2000  // Handle a global address or an external symbol. If it's not one of
2001  // those, the target's already in a register, so we don't need to do
2002  // anything extra.
2003  if (isa<GlobalAddressSDNode>(Callee)) {
2004  // Create a constant pool entry for the callee address
2005  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2006  ARMConstantPoolValue *CPV =
2007  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2008 
2009  // Get the address of the callee into a register
2010  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2011  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2012  Callee = DAG.getLoad(
2013  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2015  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2016  const char *Sym = S->getSymbol();
2017 
2018  // Create a constant pool entry for the callee address
2019  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2020  ARMConstantPoolValue *CPV =
2022  ARMPCLabelIndex, 0);
2023  // Get the address of the callee into a register
2024  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2025  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2026  Callee = DAG.getLoad(
2027  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2029  }
2030  } else if (isa<GlobalAddressSDNode>(Callee)) {
2031  // If we're optimizing for minimum size and the function is called three or
2032  // more times in this block, we can improve codesize by calling indirectly
2033  // as BLXr has a 16-bit encoding.
2034  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2035  auto *BB = CLI.CS->getParent();
2036  bool PreferIndirect =
2037  Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2038  count_if(GV->users(), [&BB](const User *U) {
2039  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2040  }) > 2;
2041 
2042  if (!PreferIndirect) {
2043  isDirect = true;
2044  bool isDef = GV->isStrongDefinitionForLinker();
2045 
2046  // ARM call to a local ARM function is predicable.
2047  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2048  // tBX takes a register source operand.
2049  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2050  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2051  Callee = DAG.getNode(
2052  ARMISD::WrapperPIC, dl, PtrVt,
2053  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2054  Callee = DAG.getLoad(
2055  PtrVt, dl, DAG.getEntryNode(), Callee,
2057  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2059  } else if (Subtarget->isTargetCOFF()) {
2060  assert(Subtarget->isTargetWindows() &&
2061  "Windows is the only supported COFF target");
2062  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2065  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2066  TargetFlags);
2067  if (GV->hasDLLImportStorageClass())
2068  Callee =
2069  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2070  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2072  } else {
2073  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2074  }
2075  }
2076  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2077  isDirect = true;
2078  // tBX takes a register source operand.
2079  const char *Sym = S->getSymbol();
2080  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2081  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2082  ARMConstantPoolValue *CPV =
2084  ARMPCLabelIndex, 4);
2085  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2086  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2087  Callee = DAG.getLoad(
2088  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2090  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2091  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2092  } else {
2093  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2094  }
2095  }
2096 
2097  // FIXME: handle tail calls differently.
2098  unsigned CallOpc;
2099  if (Subtarget->isThumb()) {
2100  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2101  CallOpc = ARMISD::CALL_NOLINK;
2102  else
2103  CallOpc = ARMISD::CALL;
2104  } else {
2105  if (!isDirect && !Subtarget->hasV5TOps())
2106  CallOpc = ARMISD::CALL_NOLINK;
2107  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2108  // Emit regular call when code size is the priority
2109  !MF.getFunction()->optForMinSize())
2110  // "mov lr, pc; b _foo" to avoid confusing the RSP
2111  CallOpc = ARMISD::CALL_NOLINK;
2112  else
2113  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2114  }
2115 
2116  std::vector<SDValue> Ops;
2117  Ops.push_back(Chain);
2118  Ops.push_back(Callee);
2119 
2120  // Add argument registers to the end of the list so that they are known live
2121  // into the call.
2122  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2123  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2124  RegsToPass[i].second.getValueType()));
2125 
2126  // Add a register mask operand representing the call-preserved registers.
2127  if (!isTailCall) {
2128  const uint32_t *Mask;
2129  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2130  if (isThisReturn) {
2131  // For 'this' returns, use the R0-preserving mask if applicable
2132  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2133  if (!Mask) {
2134  // Set isThisReturn to false if the calling convention is not one that
2135  // allows 'returned' to be modeled in this way, so LowerCallResult does
2136  // not try to pass 'this' straight through
2137  isThisReturn = false;
2138  Mask = ARI->getCallPreservedMask(MF, CallConv);
2139  }
2140  } else
2141  Mask = ARI->getCallPreservedMask(MF, CallConv);
2142 
2143  assert(Mask && "Missing call preserved mask for calling convention");
2144  Ops.push_back(DAG.getRegisterMask(Mask));
2145  }
2146 
2147  if (InFlag.getNode())
2148  Ops.push_back(InFlag);
2149 
2150  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2151  if (isTailCall) {
2153  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2154  }
2155 
2156  // Returns a chain and a flag for retval copy to use.
2157  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2158  InFlag = Chain.getValue(1);
2159 
2160  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2161  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2162  if (!Ins.empty())
2163  InFlag = Chain.getValue(1);
2164 
2165  // Handle result values, copying them out of physregs into vregs that we
2166  // return.
2167  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2168  InVals, isThisReturn,
2169  isThisReturn ? OutVals[0] : SDValue());
2170 }
2171 
2172 /// HandleByVal - Every parameter *after* a byval parameter is passed
2173 /// on the stack. Remember the next parameter register to allocate,
2174 /// and then confiscate the rest of the parameter registers to insure
2175 /// this.
2176 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2177  unsigned Align) const {
2178  // Byval (as with any stack) slots are always at least 4 byte aligned.
2179  Align = std::max(Align, 4U);
2180 
2181  unsigned Reg = State->AllocateReg(GPRArgRegs);
2182  if (!Reg)
2183  return;
2184 
2185  unsigned AlignInRegs = Align / 4;
2186  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2187  for (unsigned i = 0; i < Waste; ++i)
2188  Reg = State->AllocateReg(GPRArgRegs);
2189 
2190  if (!Reg)
2191  return;
2192 
2193  unsigned Excess = 4 * (ARM::R4 - Reg);
2194 
2195  // Special case when NSAA != SP and parameter size greater than size of
2196  // all remained GPR regs. In that case we can't split parameter, we must
2197  // send it to stack. We also must set NCRN to R4, so waste all
2198  // remained registers.
2199  const unsigned NSAAOffset = State->getNextStackOffset();
2200  if (NSAAOffset != 0 && Size > Excess) {
2201  while (State->AllocateReg(GPRArgRegs))
2202  ;
2203  return;
2204  }
2205 
2206  // First register for byval parameter is the first register that wasn't
2207  // allocated before this method call, so it would be "reg".
2208  // If parameter is small enough to be saved in range [reg, r4), then
2209  // the end (first after last) register would be reg + param-size-in-regs,
2210  // else parameter would be splitted between registers and stack,
2211  // end register would be r4 in this case.
2212  unsigned ByValRegBegin = Reg;
2213  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2214  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2215  // Note, first register is allocated in the beginning of function already,
2216  // allocate remained amount of registers we need.
2217  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2218  State->AllocateReg(GPRArgRegs);
2219  // A byval parameter that is split between registers and memory needs its
2220  // size truncated here.
2221  // In the case where the entire structure fits in registers, we set the
2222  // size in memory to zero.
2223  Size = std::max<int>(Size - Excess, 0);
2224 }
2225 
2226 /// MatchingStackOffset - Return true if the given stack call argument is
2227 /// already available in the same position (relatively) of the caller's
2228 /// incoming argument stack.
2229 static
2232  const TargetInstrInfo *TII) {
2233  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2234  int FI = std::numeric_limits<int>::max();
2235  if (Arg.getOpcode() == ISD::CopyFromReg) {
2236  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2238  return false;
2239  MachineInstr *Def = MRI->getVRegDef(VR);
2240  if (!Def)
2241  return false;
2242  if (!Flags.isByVal()) {
2243  if (!TII->isLoadFromStackSlot(*Def, FI))
2244  return false;
2245  } else {
2246  return false;
2247  }
2248  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2249  if (Flags.isByVal())
2250  // ByVal argument is passed in as a pointer but it's now being
2251  // dereferenced. e.g.
2252  // define @foo(%struct.X* %A) {
2253  // tail call @bar(%struct.X* byval %A)
2254  // }
2255  return false;
2256  SDValue Ptr = Ld->getBasePtr();
2258  if (!FINode)
2259  return false;
2260  FI = FINode->getIndex();
2261  } else
2262  return false;
2263 
2265  if (!MFI.isFixedObjectIndex(FI))
2266  return false;
2267  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2268 }
2269 
2270 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2271 /// for tail call optimization. Targets which want to do tail call
2272 /// optimization should implement this function.
2273 bool
2274 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2275  CallingConv::ID CalleeCC,
2276  bool isVarArg,
2277  bool isCalleeStructRet,
2278  bool isCallerStructRet,
2279  const SmallVectorImpl<ISD::OutputArg> &Outs,
2280  const SmallVectorImpl<SDValue> &OutVals,
2281  const SmallVectorImpl<ISD::InputArg> &Ins,
2282  SelectionDAG& DAG) const {
2283  MachineFunction &MF = DAG.getMachineFunction();
2284  const Function *CallerF = MF.getFunction();
2285  CallingConv::ID CallerCC = CallerF->getCallingConv();
2286 
2287  assert(Subtarget->supportsTailCall());
2288 
2289  // Look for obvious safe cases to perform tail call optimization that do not
2290  // require ABI changes. This is what gcc calls sibcall.
2291 
2292  // Exception-handling functions need a special set of instructions to indicate
2293  // a return to the hardware. Tail-calling another function would probably
2294  // break this.
2295  if (CallerF->hasFnAttribute("interrupt"))
2296  return false;
2297 
2298  // Also avoid sibcall optimization if either caller or callee uses struct
2299  // return semantics.
2300  if (isCalleeStructRet || isCallerStructRet)
2301  return false;
2302 
2303  // Externally-defined functions with weak linkage should not be
2304  // tail-called on ARM when the OS does not support dynamic
2305  // pre-emption of symbols, as the AAELF spec requires normal calls
2306  // to undefined weak functions to be replaced with a NOP or jump to the
2307  // next instruction. The behaviour of branch instructions in this
2308  // situation (as used for tail calls) is implementation-defined, so we
2309  // cannot rely on the linker replacing the tail call with a return.
2310  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2311  const GlobalValue *GV = G->getGlobal();
2312  const Triple &TT = getTargetMachine().getTargetTriple();
2313  if (GV->hasExternalWeakLinkage() &&
2314  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2315  return false;
2316  }
2317 
2318  // Check that the call results are passed in the same way.
2319  LLVMContext &C = *DAG.getContext();
2320  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2321  CCAssignFnForReturn(CalleeCC, isVarArg),
2322  CCAssignFnForReturn(CallerCC, isVarArg)))
2323  return false;
2324  // The callee has to preserve all registers the caller needs to preserve.
2325  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2326  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2327  if (CalleeCC != CallerCC) {
2328  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2329  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2330  return false;
2331  }
2332 
2333  // If Caller's vararg or byval argument has been split between registers and
2334  // stack, do not perform tail call, since part of the argument is in caller's
2335  // local frame.
2336  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2337  if (AFI_Caller->getArgRegsSaveSize())
2338  return false;
2339 
2340  // If the callee takes no arguments then go on to check the results of the
2341  // call.
2342  if (!Outs.empty()) {
2343  // Check if stack adjustment is needed. For now, do not do this if any
2344  // argument is passed on the stack.
2346  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2347  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2348  if (CCInfo.getNextStackOffset()) {
2349  // Check if the arguments are already laid out in the right way as
2350  // the caller's fixed stack objects.
2351  MachineFrameInfo &MFI = MF.getFrameInfo();
2352  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2353  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2354  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2355  i != e;
2356  ++i, ++realArgIdx) {
2357  CCValAssign &VA = ArgLocs[i];
2358  EVT RegVT = VA.getLocVT();
2359  SDValue Arg = OutVals[realArgIdx];
2360  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2361  if (VA.getLocInfo() == CCValAssign::Indirect)
2362  return false;
2363  if (VA.needsCustom()) {
2364  // f64 and vector types are split into multiple registers or
2365  // register/stack-slot combinations. The types will not match
2366  // the registers; give up on memory f64 refs until we figure
2367  // out what to do about this.
2368  if (!VA.isRegLoc())
2369  return false;
2370  if (!ArgLocs[++i].isRegLoc())
2371  return false;
2372  if (RegVT == MVT::v2f64) {
2373  if (!ArgLocs[++i].isRegLoc())
2374  return false;
2375  if (!ArgLocs[++i].isRegLoc())
2376  return false;
2377  }
2378  } else if (!VA.isRegLoc()) {
2379  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2380  MFI, MRI, TII))
2381  return false;
2382  }
2383  }
2384  }
2385 
2386  const MachineRegisterInfo &MRI = MF.getRegInfo();
2387  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2388  return false;
2389  }
2390 
2391  return true;
2392 }
2393 
2394 bool
2395 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2396  MachineFunction &MF, bool isVarArg,
2397  const SmallVectorImpl<ISD::OutputArg> &Outs,
2398  LLVMContext &Context) const {
2400  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2401  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2402 }
2403 
2405  const SDLoc &DL, SelectionDAG &DAG) {
2406  const MachineFunction &MF = DAG.getMachineFunction();
2407  const Function *F = MF.getFunction();
2408 
2409  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2410 
2411  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2412  // version of the "preferred return address". These offsets affect the return
2413  // instruction if this is a return from PL1 without hypervisor extensions.
2414  // IRQ/FIQ: +4 "subs pc, lr, #4"
2415  // SWI: 0 "subs pc, lr, #0"
2416  // ABORT: +4 "subs pc, lr, #4"
2417  // UNDEF: +4/+2 "subs pc, lr, #0"
2418  // UNDEF varies depending on where the exception came from ARM or Thumb
2419  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2420 
2421  int64_t LROffset;
2422  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2423  IntKind == "ABORT")
2424  LROffset = 4;
2425  else if (IntKind == "SWI" || IntKind == "UNDEF")
2426  LROffset = 0;
2427  else
2428  report_fatal_error("Unsupported interrupt attribute. If present, value "
2429  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2430 
2431  RetOps.insert(RetOps.begin() + 1,
2432  DAG.getConstant(LROffset, DL, MVT::i32, false));
2433 
2434  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2435 }
2436 
2437 SDValue
2438 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2439  bool isVarArg,
2440  const SmallVectorImpl<ISD::OutputArg> &Outs,
2441  const SmallVectorImpl<SDValue> &OutVals,
2442  const SDLoc &dl, SelectionDAG &DAG) const {
2443 
2444  // CCValAssign - represent the assignment of the return value to a location.
2446 
2447  // CCState - Info about the registers and stack slots.
2448  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2449  *DAG.getContext());
2450 
2451  // Analyze outgoing return values.
2452  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2453 
2454  SDValue Flag;
2455  SmallVector<SDValue, 4> RetOps;
2456  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2457  bool isLittleEndian = Subtarget->isLittle();
2458 
2459  MachineFunction &MF = DAG.getMachineFunction();
2461  AFI->setReturnRegsCount(RVLocs.size());
2462 
2463  // Copy the result values into the output registers.
2464  for (unsigned i = 0, realRVLocIdx = 0;
2465  i != RVLocs.size();
2466  ++i, ++realRVLocIdx) {
2467  CCValAssign &VA = RVLocs[i];
2468  assert(VA.isRegLoc() && "Can only return in registers!");
2469 
2470  SDValue Arg = OutVals[realRVLocIdx];
2471 
2472  switch (VA.getLocInfo()) {
2473  default: llvm_unreachable("Unknown loc info!");
2474  case CCValAssign::Full: break;
2475  case CCValAssign::BCvt:
2476  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2477  break;
2478  }
2479 
2480  if (VA.needsCustom()) {
2481  if (VA.getLocVT() == MVT::v2f64) {
2482  // Extract the first half and return it in two registers.
2483  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2484  DAG.getConstant(0, dl, MVT::i32));
2485  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2486  DAG.getVTList(MVT::i32, MVT::i32), Half);
2487 
2488  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2489  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2490  Flag);
2491  Flag = Chain.getValue(1);
2492  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2493  VA = RVLocs[++i]; // skip ahead to next loc
2494  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2495  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2496  Flag);
2497  Flag = Chain.getValue(1);
2498  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2499  VA = RVLocs[++i]; // skip ahead to next loc
2500 
2501  // Extract the 2nd half and fall through to handle it as an f64 value.
2502  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2503  DAG.getConstant(1, dl, MVT::i32));
2504  }
2505  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2506  // available.
2507  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2508  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2509  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2510  fmrrd.getValue(isLittleEndian ? 0 : 1),
2511  Flag);
2512  Flag = Chain.getValue(1);
2513  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2514  VA = RVLocs[++i]; // skip ahead to next loc
2515  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2516  fmrrd.getValue(isLittleEndian ? 1 : 0),
2517  Flag);
2518  } else
2519  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2520 
2521  // Guarantee that all emitted copies are
2522  // stuck together, avoiding something bad.
2523  Flag = Chain.getValue(1);
2524  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2525  }
2526  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2527  const MCPhysReg *I =
2529  if (I) {
2530  for (; *I; ++I) {
2531  if (ARM::GPRRegClass.contains(*I))
2532  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2533  else if (ARM::DPRRegClass.contains(*I))
2534  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2535  else
2536  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2537  }
2538  }
2539 
2540  // Update chain and glue.
2541  RetOps[0] = Chain;
2542  if (Flag.getNode())
2543  RetOps.push_back(Flag);
2544 
2545  // CPUs which aren't M-class use a special sequence to return from
2546  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2547  // though we use "subs pc, lr, #N").
2548  //
2549  // M-class CPUs actually use a normal return sequence with a special
2550  // (hardware-provided) value in LR, so the normal code path works.
2551  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2552  !Subtarget->isMClass()) {
2553  if (Subtarget->isThumb1Only())
2554  report_fatal_error("interrupt attribute is not supported in Thumb1");
2555  return LowerInterruptReturn(RetOps, dl, DAG);
2556  }
2557 
2558  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2559 }
2560 
2561 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2562  if (N->getNumValues() != 1)
2563  return false;
2564  if (!N->hasNUsesOfValue(1, 0))
2565  return false;
2566 
2567  SDValue TCChain = Chain;
2568  SDNode *Copy = *N->use_begin();
2569  if (Copy->getOpcode() == ISD::CopyToReg) {
2570  // If the copy has a glue operand, we conservatively assume it isn't safe to
2571  // perform a tail call.
2572  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2573  return false;
2574  TCChain = Copy->getOperand(0);
2575  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2576  SDNode *VMov = Copy;
2577  // f64 returned in a pair of GPRs.
2578  SmallPtrSet<SDNode*, 2> Copies;
2579  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2580  UI != UE; ++UI) {
2581  if (UI->getOpcode() != ISD::CopyToReg)
2582  return false;
2583  Copies.insert(*UI);
2584  }
2585  if (Copies.size() > 2)
2586  return false;
2587 
2588  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2589  UI != UE; ++UI) {
2590  SDValue UseChain = UI->getOperand(0);
2591  if (Copies.count(UseChain.getNode()))
2592  // Second CopyToReg
2593  Copy = *UI;
2594  else {
2595  // We are at the top of this chain.
2596  // If the copy has a glue operand, we conservatively assume it
2597  // isn't safe to perform a tail call.
2598  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2599  return false;
2600  // First CopyToReg
2601  TCChain = UseChain;
2602  }
2603  }
2604  } else if (Copy->getOpcode() == ISD::BITCAST) {
2605  // f32 returned in a single GPR.
2606  if (!Copy->hasOneUse())
2607  return false;
2608  Copy = *Copy->use_begin();
2609  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2610  return false;
2611  // If the copy has a glue operand, we conservatively assume it isn't safe to
2612  // perform a tail call.
2613  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2614  return false;
2615  TCChain = Copy->getOperand(0);
2616  } else {
2617  return false;
2618  }
2619 
2620  bool HasRet = false;
2621  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2622  UI != UE; ++UI) {
2623  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2624  UI->getOpcode() != ARMISD::INTRET_FLAG)
2625  return false;
2626  HasRet = true;
2627  }
2628 
2629  if (!HasRet)
2630  return false;
2631 
2632  Chain = TCChain;
2633  return true;
2634 }
2635 
2636 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2637  if (!Subtarget->supportsTailCall())
2638  return false;
2639 
2640  auto Attr =
2641  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2642  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2643  return false;
2644 
2645  return true;
2646 }
2647 
2648 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2649 // and pass the lower and high parts through.
2651  SDLoc DL(Op);
2652  SDValue WriteValue = Op->getOperand(2);
2653 
2654  // This function is only supposed to be called for i64 type argument.
2655  assert(WriteValue.getValueType() == MVT::i64
2656  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2657 
2658  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2659  DAG.getConstant(0, DL, MVT::i32));
2660  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2661  DAG.getConstant(1, DL, MVT::i32));
2662  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2663  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2664 }
2665 
2666 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2667 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2668 // one of the above mentioned nodes. It has to be wrapped because otherwise
2669 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2670 // be used to form addressing mode. These wrapped nodes will be selected
2671 // into MOVi.
2672 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2673  SelectionDAG &DAG) const {
2674  EVT PtrVT = Op.getValueType();
2675  // FIXME there is no actual debug info here
2676  SDLoc dl(Op);
2677  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2678  SDValue Res;
2679 
2680  // When generating execute-only code Constant Pools must be promoted to the
2681  // global data section. It's a bit ugly that we can't share them across basic
2682  // blocks, but this way we guarantee that execute-only behaves correct with
2683  // position-independent addressing modes.
2684  if (Subtarget->genExecuteOnly()) {
2685  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2686  auto T = const_cast<Type*>(CP->getType());
2687  auto C = const_cast<Constant*>(CP->getConstVal());
2688  auto M = const_cast<Module*>(DAG.getMachineFunction().
2689  getFunction()->getParent());
2690  auto GV = new GlobalVariable(
2691  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2692  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2693  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2694  Twine(AFI->createPICLabelUId())
2695  );
2696  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2697  dl, PtrVT);
2698  return LowerGlobalAddress(GA, DAG);
2699  }
2700 
2701  if (CP->isMachineConstantPoolEntry())
2702  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2703  CP->getAlignment());
2704  else
2705  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2706  CP->getAlignment());
2707  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2708 }
2709 
2712 }
2713 
2714 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2715  SelectionDAG &DAG) const {
2716  MachineFunction &MF = DAG.getMachineFunction();
2718  unsigned ARMPCLabelIndex = 0;
2719  SDLoc DL(Op);
2720  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2721  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2722  SDValue CPAddr;
2723  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2724  if (!IsPositionIndependent) {
2725  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2726  } else {
2727  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2728  ARMPCLabelIndex = AFI->createPICLabelUId();
2729  ARMConstantPoolValue *CPV =
2730  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2731  ARMCP::CPBlockAddress, PCAdj);
2732  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2733  }
2734  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2735  SDValue Result = DAG.getLoad(
2736  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2738  if (!IsPositionIndependent)
2739  return Result;
2740  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2741  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2742 }
2743 
2744 /// \brief Convert a TLS address reference into the correct sequence of loads
2745 /// and calls to compute the variable's address for Darwin, and return an
2746 /// SDValue containing the final node.
2747 
2748 /// Darwin only has one TLS scheme which must be capable of dealing with the
2749 /// fully general situation, in the worst case. This means:
2750 /// + "extern __thread" declaration.
2751 /// + Defined in a possibly unknown dynamic library.
2752 ///
2753 /// The general system is that each __thread variable has a [3 x i32] descriptor
2754 /// which contains information used by the runtime to calculate the address. The
2755 /// only part of this the compiler needs to know about is the first word, which
2756 /// contains a function pointer that must be called with the address of the
2757 /// entire descriptor in "r0".
2758 ///
2759 /// Since this descriptor may be in a different unit, in general access must
2760 /// proceed along the usual ARM rules. A common sequence to produce is:
2761 ///
2762 /// movw rT1, :lower16:_var$non_lazy_ptr
2763 /// movt rT1, :upper16:_var$non_lazy_ptr
2764 /// ldr r0, [rT1]
2765 /// ldr rT2, [r0]
2766 /// blx rT2
2767 /// [...address now in r0...]
2768 SDValue
2769 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2770  SelectionDAG &DAG) const {
2771  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
2772  SDLoc DL(Op);
2773 
2774  // First step is to get the address of the actua global symbol. This is where
2775  // the TLS descriptor lives.
2776  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2777 
2778  // The first entry in the descriptor is a function pointer that we must call
2779  // to obtain the address of the variable.
2780  SDValue Chain = DAG.getEntryNode();
2781  SDValue FuncTLVGet = DAG.getLoad(
2782  MVT::i32, DL, Chain, DescAddr,
2784  /* Alignment = */ 4,
2787  Chain = FuncTLVGet.getValue(1);
2788 
2790  MachineFrameInfo &MFI = F.getFrameInfo();
2791  MFI.setAdjustsStack(true);
2792 
2793  // TLS calls preserve all registers except those that absolutely must be
2794  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2795  // silly).
2796  auto TRI =
2797  getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2798  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2799  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2800 
2801  // Finally, we can make the call. This is just a degenerate version of a
2802  // normal AArch64 call node: r0 takes the address of the descriptor, and
2803  // returns the address of the variable in this thread.
2804  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2805  Chain =
2807  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2808  DAG.getRegisterMask(Mask), Chain.getValue(1));
2809  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2810 }
2811 
2812 SDValue
2813 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2814  SelectionDAG &DAG) const {
2815  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2816 
2817  SDValue Chain = DAG.getEntryNode();
2818  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2819  SDLoc DL(Op);
2820 
2821  // Load the current TEB (thread environment block)
2822  SDValue Ops[] = {Chain,
2823  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2824  DAG.getConstant(15, DL, MVT::i32),
2825  DAG.getConstant(0, DL, MVT::i32),
2826  DAG.getConstant(13, DL, MVT::i32),
2827  DAG.getConstant(0, DL, MVT::i32),
2828  DAG.getConstant(2, DL, MVT::i32)};
2829  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2830  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2831 
2832  SDValue TEB = CurrentTEB.getValue(0);
2833  Chain = CurrentTEB.getValue(1);
2834 
2835  // Load the ThreadLocalStoragePointer from the TEB
2836  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2837  SDValue TLSArray =
2838  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2839  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2840 
2841  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2842  // offset into the TLSArray.
2843 
2844  // Load the TLS index from the C runtime
2845  SDValue TLSIndex =
2846  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2847  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2848  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2849 
2850  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2851  DAG.getConstant(2, DL, MVT::i32));
2852  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2853  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2854  MachinePointerInfo());
2855 
2856  // Get the offset of the start of the .tls section (section base)
2857  const auto *GA = cast<GlobalAddressSDNode>(Op);
2858  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2859  SDValue Offset = DAG.getLoad(
2860  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2861  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2863 
2864  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2865 }
2866 
2867 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2868 SDValue
2869 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2870  SelectionDAG &DAG) const {
2871  SDLoc dl(GA);
2872  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2873  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2874  MachineFunction &MF = DAG.getMachineFunction();
2876  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2877  ARMConstantPoolValue *CPV =
2878  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2879  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2880  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2881  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2882  Argument = DAG.getLoad(
2883  PtrVT, dl, DAG.getEntryNode(), Argument,
2885  SDValue Chain = Argument.getValue(1);
2886 
2887  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2888  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2889 
2890  // call __tls_get_addr.
2891  ArgListTy Args;
2892  ArgListEntry Entry;
2893  Entry.Node = Argument;
2894  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2895  Args.push_back(Entry);
2896 
2897  // FIXME: is there useful debug info available here?
2899  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2901  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2902 
2903  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2904  return CallResult.first;
2905 }
2906 
2907 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2908 // "local exec" model.
2909 SDValue
2910 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2911  SelectionDAG &DAG,
2912  TLSModel::Model model) const {
2913  const GlobalValue *GV = GA->getGlobal();
2914  SDLoc dl(GA);
2915  SDValue Offset;
2916  SDValue Chain = DAG.getEntryNode();
2917  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2918  // Get the Thread Pointer
2920 
2921  if (model == TLSModel::InitialExec) {
2922  MachineFunction &MF = DAG.getMachineFunction();
2924  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2925  // Initial exec model.
2926  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2927  ARMConstantPoolValue *CPV =
2928  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2930  true);
2931  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2932  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2933  Offset = DAG.getLoad(
2934  PtrVT, dl, Chain, Offset,
2936  Chain = Offset.getValue(1);
2937 
2938  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2939  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2940 
2941  Offset = DAG.getLoad(
2942  PtrVT, dl, Chain, Offset,
2944  } else {
2945  // local exec model
2946  assert(model == TLSModel::LocalExec);
2947  ARMConstantPoolValue *CPV =
2949  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2950  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2951  Offset = DAG.getLoad(
2952  PtrVT, dl, Chain, Offset,
2954  }
2955 
2956  // The address of the thread local variable is the add of the thread
2957  // pointer with the offset of the variable.
2958  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2959 }
2960 
2961 SDValue
2962 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2963  if (Subtarget->isTargetDarwin())
2964  return LowerGlobalTLSAddressDarwin(Op, DAG);
2965 
2966  if (Subtarget->isTargetWindows())
2967  return LowerGlobalTLSAddressWindows(Op, DAG);
2968 
2969  // TODO: implement the "local dynamic" model
2970  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
2971  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2972  if (DAG.getTarget().Options.EmulatedTLS)
2973  return LowerToTLSEmulatedModel(GA, DAG);
2974 
2975  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2976 
2977  switch (model) {
2980  return LowerToTLSGeneralDynamicModel(GA, DAG);
2981  case TLSModel::InitialExec:
2982  case TLSModel::LocalExec:
2983  return LowerToTLSExecModels(GA, DAG, model);
2984  }
2985  llvm_unreachable("bogus TLS model");
2986 }
2987 
2988 /// Return true if all users of V are within function F, looking through
2989 /// ConstantExprs.
2990 static bool allUsersAreInFunction(const Value *V, const Function *F) {
2991  SmallVector<const User*,4> Worklist;
2992  for (auto *U : V->users())
2993  Worklist.push_back(U);
2994  while (!Worklist.empty()) {
2995  auto *U = Worklist.pop_back_val();
2996  if (isa<ConstantExpr>(U)) {
2997  for (auto *UU : U->users())
2998  Worklist.push_back(UU);
2999  continue;
3000  }
3001 
3002  auto *I = dyn_cast<Instruction>(U);
3003  if (!I || I->getParent()->getParent() != F)
3004  return false;
3005  }
3006  return true;
3007 }
3008 
3009 /// Return true if all users of V are within some (any) function, looking through
3010 /// ConstantExprs. In other words, are there any global constant users?
3011 static bool allUsersAreInFunctions(const Value *V) {
3012  SmallVector<const User*,4> Worklist;
3013  for (auto *U : V->users())
3014  Worklist.push_back(U);
3015  while (!Worklist.empty()) {
3016  auto *U = Worklist.pop_back_val();
3017  if (isa<ConstantExpr>(U)) {
3018  for (auto *UU : U->users())
3019  Worklist.push_back(UU);
3020  continue;
3021  }
3022 
3023  if (!isa<Instruction>(U))
3024  return false;
3025  }
3026  return true;
3027 }
3028 
3029 // Return true if T is an integer, float or an array/vector of either.
3030 static bool isSimpleType(Type *T) {
3031  if (T->isIntegerTy() || T->isFloatingPointTy())
3032  return true;
3033  Type *SubT = nullptr;
3034  if (T->isArrayTy())
3035  SubT = T->getArrayElementType();
3036  else if (T->isVectorTy())
3037  SubT = T->getVectorElementType();
3038  else
3039  return false;
3040  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3041 }
3042 
3044  EVT PtrVT, const SDLoc &dl) {
3045  // If we're creating a pool entry for a constant global with unnamed address,
3046  // and the global is small enough, we can emit it inline into the constant pool
3047  // to save ourselves an indirection.
3048  //
3049  // This is a win if the constant is only used in one function (so it doesn't
3050  // need to be duplicated) or duplicating the constant wouldn't increase code
3051  // size (implying the constant is no larger than 4 bytes).
3052  const Function *F = DAG.getMachineFunction().getFunction();
3053 
3054  // We rely on this decision to inline being idemopotent and unrelated to the
3055  // use-site. We know that if we inline a variable at one use site, we'll
3056  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3057  // doesn't know about this optimization, so bail out if it's enabled else
3058  // we could decide to inline here (and thus never emit the GV) but require
3059  // the GV from fast-isel generated code.
3060  if (!EnableConstpoolPromotion ||
3062  return SDValue();
3063 
3064  auto *GVar = dyn_cast<GlobalVariable>(GV);
3065  if (!GVar || !GVar->hasInitializer() ||
3066  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3067  !GVar->hasLocalLinkage())
3068  return SDValue();
3069 
3070  // Ensure that we don't try and inline any type that contains pointers. If
3071  // we inline a value that contains relocations, we move the relocations from
3072  // .data to .text which is not ideal.
3073  auto *Init = GVar->getInitializer();
3074  if (!isSimpleType(Init->getType()))
3075  return SDValue();
3076 
3077  // The constant islands pass can only really deal with alignment requests
3078  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3079  // any type wanting greater alignment requirements than 4 bytes. We also
3080  // can only promote constants that are multiples of 4 bytes in size or
3081  // are paddable to a multiple of 4. Currently we only try and pad constants
3082  // that are strings for simplicity.
3083  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3084  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3085  unsigned Align = GVar->getAlignment();
3086  unsigned RequiredPadding = 4 - (Size % 4);
3087  bool PaddingPossible =
3088  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3089  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3090  Size == 0)
3091  return SDValue();
3092 
3093  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3094  MachineFunction &MF = DAG.getMachineFunction();
3096 
3097  // We can't bloat the constant pool too much, else the ConstantIslands pass
3098  // may fail to converge. If we haven't promoted this global yet (it may have
3099  // multiple uses), and promoting it would increase the constant pool size (Sz
3100  // > 4), ensure we have space to do so up to MaxTotal.
3101  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3102  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3104  return SDValue();
3105 
3106  // This is only valid if all users are in a single function OR it has users
3107  // in multiple functions but it no larger than a pointer. We also check if
3108  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3109  // address taken.
3110  if (!allUsersAreInFunction(GVar, F) &&
3111  !(Size <= 4 && allUsersAreInFunctions(GVar)))
3112  return SDValue();
3113 
3114  // We're going to inline this global. Pad it out if needed.
3115  if (RequiredPadding != 4) {
3116  StringRef S = CDAInit->getAsString();
3117 
3119  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3120  while (RequiredPadding--)
3121  V.push_back(0);
3122  Init = ConstantDataArray::get(*DAG.getContext(), V);
3123  }
3124 
3125  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3126  SDValue CPAddr =
3127  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3128  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3131  PaddedSize - 4);
3132  }
3133  ++NumConstpoolPromoted;
3134  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3135 }
3136 
3137 static bool isReadOnly(const GlobalValue *GV) {
3138  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3139  GV = GA->getBaseObject();
3140  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3141  isa<Function>(GV);
3142 }
3143 
3144 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3145  SelectionDAG &DAG) const {
3146  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3147  default: llvm_unreachable("unknown object format");
3148  case Triple::COFF:
3149  return LowerGlobalAddressWindows(Op, DAG);
3150  case Triple::ELF:
3151  return LowerGlobalAddressELF(Op, DAG);
3152  case Triple::MachO:
3153  return LowerGlobalAddressDarwin(Op, DAG);
3154  }
3155 }
3156 
3157 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3158  SelectionDAG &DAG) const {
3159  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3160  SDLoc dl(Op);
3161  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3162  const TargetMachine &TM = getTargetMachine();
3163  bool IsRO = isReadOnly(GV);
3164 
3165  // promoteToConstantPool only if not generating XO text section
3166  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3167  if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3168  return V;
3169 
3170  if (isPositionIndependent()) {
3171  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3172 
3173  MachineFunction &MF = DAG.getMachineFunction();
3175  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3176  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3177  SDLoc dl(Op);
3178  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3180  GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3181  UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3182  /*AddCurrentAddress=*/UseGOT_PREL);
3183  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3184  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3185  SDValue Result = DAG.getLoad(
3186  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3188  SDValue Chain = Result.getValue(1);
3189  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3190  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3191  if (UseGOT_PREL)
3192  Result =
3193  DAG.getLoad(PtrVT, dl, Chain, Result,
3195  return Result;
3196  } else if (Subtarget->isROPI() && IsRO) {
3197  // PC-relative.
3198  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3199  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3200  return Result;
3201  } else if (Subtarget->isRWPI() && !IsRO) {
3202  // SB-relative.
3203  SDValue RelAddr;
3204  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3205  ++NumMovwMovt;
3206  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3207  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3208  } else { // use literal pool for address constant
3209  ARMConstantPoolValue *CPV =
3211  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3212  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3213  RelAddr = DAG.getLoad(
3214  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3216  }
3217  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3218  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3219  return Result;
3220  }
3221 
3222  // If we have T2 ops, we can materialize the address directly via movt/movw
3223  // pair. This is always cheaper.
3224  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3225  ++NumMovwMovt;
3226  // FIXME: Once remat is capable of dealing with instructions with register
3227  // operands, expand this into two nodes.
3228  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3229  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3230  } else {
3231  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3232  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3233  return DAG.getLoad(
3234  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3236  }
3237 }
3238 
3239 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3240  SelectionDAG &DAG) const {
3241  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3242  "ROPI/RWPI not currently supported for Darwin");
3243  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3244  SDLoc dl(Op);
3245  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3246 
3247  if (Subtarget->useMovt(DAG.getMachineFunction()))
3248  ++NumMovwMovt;
3249 
3250  // FIXME: Once remat is capable of dealing with instructions with register
3251  // operands, expand this into multiple nodes
3252  unsigned Wrapper =
3254 
3255  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3256  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3257 
3258  if (Subtarget->isGVIndirectSymbol(GV))
3259  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3261  return Result;
3262 }
3263 
3264 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3265  SelectionDAG &DAG) const {
3266  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3267  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3268  "Windows on ARM expects to use movw/movt");
3269  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3270  "ROPI/RWPI not currently supported for Windows");
3271 
3272  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3273  const ARMII::TOF TargetFlags =
3274  (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3275  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3276  SDValue Result;
3277  SDLoc DL(Op);
3278 
3279  ++NumMovwMovt;
3280 
3281  // FIXME: Once remat is capable of dealing with instructions with register
3282  // operands, expand this into two nodes.
3283  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3284  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3285  TargetFlags));
3286  if (GV->hasDLLImportStorageClass())
3287  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3289  return Result;
3290 }
3291 
3292 SDValue
3293 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3294  SDLoc dl(Op);
3295  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3296  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3297  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3298  Op.getOperand(1), Val);
3299 }
3300 
3301 SDValue
3302 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3303  SDLoc dl(Op);
3304  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3305  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3306 }
3307 
3308 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3309  SelectionDAG &DAG) const {
3310  SDLoc dl(Op);
3312  Op.getOperand(0));
3313 }
3314 
3315 SDValue
3316 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3317  const ARMSubtarget *Subtarget) const {
3318  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3319  SDLoc dl(Op);
3320  switch (IntNo) {
3321  default: return SDValue(); // Don't custom lower most intrinsics.
3322  case Intrinsic::thread_pointer: {
3323  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3324  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3325  }
3326  case Intrinsic::eh_sjlj_lsda: {
3327  MachineFunction &MF = DAG.getMachineFunction();
3329  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3330  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3331  SDValue CPAddr;
3332  bool IsPositionIndependent = isPositionIndependent();
3333  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3334  ARMConstantPoolValue *CPV =
3335  ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3336  ARMCP::CPLSDA, PCAdj);
3337  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3338  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3339  SDValue Result = DAG.getLoad(
3340  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3342 
3343  if (IsPositionIndependent) {
3344  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3345  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3346  }
3347  return Result;
3348  }
3349  case Intrinsic::arm_neon_vabs:
3350  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3351  Op.getOperand(1));
3352  case Intrinsic::arm_neon_vmulls:
3353  case Intrinsic::arm_neon_vmullu: {
3354  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3356  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3357  Op.getOperand(1), Op.getOperand(2));
3358  }
3359  case Intrinsic::arm_neon_vminnm:
3360  case Intrinsic::arm_neon_vmaxnm: {
3361  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3363  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3364  Op.getOperand(1), Op.getOperand(2));
3365  }
3366  case Intrinsic::arm_neon_vminu:
3367  case Intrinsic::arm_neon_vmaxu: {
3368  if (Op.getValueType().isFloatingPoint())
3369  return SDValue();
3370  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3371  ? ISD::UMIN : ISD::UMAX;
3372  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3373  Op.getOperand(1), Op.getOperand(2));
3374  }
3375  case Intrinsic::arm_neon_vmins:
3376  case Intrinsic::arm_neon_vmaxs: {
3377  // v{min,max}s is overloaded between signed integers and floats.
3378  if (!Op.getValueType().isFloatingPoint()) {
3379  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3380  ? ISD::SMIN : ISD::SMAX;
3381  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3382  Op.getOperand(1), Op.getOperand(2));
3383  }
3384  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3386  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3387  Op.getOperand(1), Op.getOperand(2));
3388  }
3389  case Intrinsic::arm_neon_vtbl1:
3390  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3391  Op.getOperand(1), Op.getOperand(2));
3392  case Intrinsic::arm_neon_vtbl2:
3393  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3394  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3395  }
3396 }
3397 
3399  const ARMSubtarget *Subtarget) {
3400  SDLoc dl(Op);
3401  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3402  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3403  if (SSID == SyncScope::SingleThread)
3404  return Op;
3405 
3406  if (!Subtarget->hasDataBarrier()) {
3407  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3408  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3409  // here.
3410  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3411  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3412  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3413  DAG.getConstant(0, dl, MVT::i32));
3414  }
3415 
3416  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3417  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3418  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3419  if (Subtarget->isMClass()) {
3420  // Only a full system barrier exists in the M-class architectures.
3421  Domain = ARM_MB::SY;
3422  } else if (Subtarget->preferISHSTBarriers() &&
3423  Ord == AtomicOrdering::Release) {
3424  // Swift happens to implement ISHST barriers in a way that's compatible with
3425  // Release semantics but weaker than ISH so we'd be fools not to use
3426  // it. Beware: other processors probably don't!
3427  Domain = ARM_MB::ISHST;
3428  }
3429 
3430  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3431  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3432  DAG.getConstant(Domain, dl, MVT::i32));
3433 }
3434 
3436  const ARMSubtarget *Subtarget) {
3437  // ARM pre v5TE and Thumb1 does not have preload instructions.
3438  if (!(Subtarget->isThumb2() ||
3439  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3440  // Just preserve the chain.
3441  return Op.getOperand(0);
3442 
3443  SDLoc dl(Op);
3444  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3445  if (!isRead &&
3446  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3447  // ARMv7 with MP extension has PLDW.
3448  return Op.getOperand(0);
3449 
3450  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3451  if (Subtarget->isThumb()) {
3452  // Invert the bits.
3453  isRead = ~isRead & 1;
3454  isData = ~isData & 1;
3455  }
3456 
3457  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3458  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3459  DAG.getConstant(isData, dl, MVT::i32));
3460 }
3461 
3463  MachineFunction &MF = DAG.getMachineFunction();
3464  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3465 
3466  // vastart just stores the address of the VarArgsFrameIndex slot into the
3467  // memory location argument.
3468  SDLoc dl(Op);
3469  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3470  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3471  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3472  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3473  MachinePointerInfo(SV));
3474 }
3475 
3476 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3477  CCValAssign &NextVA,
3478  SDValue &Root,
3479  SelectionDAG &DAG,
3480  const SDLoc &dl) const {
3481  MachineFunction &MF = DAG.getMachineFunction();
3483 
3484  const TargetRegisterClass *RC;
3485  if (AFI->isThumb1OnlyFunction())
3486  RC = &ARM::tGPRRegClass;
3487  else
3488  RC = &ARM::GPRRegClass;
3489 
3490  // Transform the arguments stored in physical registers into virtual ones.
3491  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3492  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3493 
3494  SDValue ArgValue2;
3495  if (NextVA.isMemLoc()) {
3496  MachineFrameInfo &MFI = MF.getFrameInfo();
3497  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3498 
3499  // Create load node to retrieve arguments from the stack.
3500  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3501  ArgValue2 = DAG.getLoad(
3502  MVT::i32, dl, Root, FIN,
3504  } else {
3505  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3506  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3507  }
3508  if (!Subtarget->isLittle())
3509  std::swap (ArgValue, ArgValue2);
3510  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3511 }
3512 
3513 // The remaining GPRs hold either the beginning of variable-argument
3514 // data, or the beginning of an aggregate passed by value (usually
3515 // byval). Either way, we allocate stack slots adjacent to the data
3516 // provided by our caller, and store the unallocated registers there.
3517 // If this is a variadic function, the va_list pointer will begin with
3518 // these values; otherwise, this reassembles a (byval) structure that
3519 // was split between registers and memory.
3520 // Return: The frame index registers were stored into.
3521 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3522  const SDLoc &dl, SDValue &Chain,
3523  const Value *OrigArg,
3524  unsigned InRegsParamRecordIdx,
3525  int ArgOffset, unsigned ArgSize) const {
3526  // Currently, two use-cases possible:
3527  // Case #1. Non-var-args function, and we meet first byval parameter.
3528  // Setup first unallocated register as first byval register;
3529  // eat all remained registers
3530  // (these two actions are performed by HandleByVal method).
3531  // Then, here, we initialize stack frame with
3532  // "store-reg" instructions.
3533  // Case #2. Var-args function, that doesn't contain byval parameters.
3534  // The same: eat all remained unallocated registers,
3535  // initialize stack frame.
3536 
3537  MachineFunction &MF = DAG.getMachineFunction();
3538  MachineFrameInfo &MFI = MF.getFrameInfo();
3540  unsigned RBegin, REnd;
3541  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3542  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3543  } else {
3544  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3545  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3546  REnd = ARM::R4;
3547  }
3548 
3549  if (REnd != RBegin)
3550  ArgOffset = -4 * (ARM::R4 - RBegin);
3551 
3552  auto PtrVT = getPointerTy(DAG.getDataLayout());
3553  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3554  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3555 
3556  SmallVector<SDValue, 4> MemOps;
3557  const TargetRegisterClass *RC =
3558  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3559 
3560  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3561  unsigned VReg = MF.addLiveIn(Reg, RC);
3562  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3563  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3564  MachinePointerInfo(OrigArg, 4 * i));
3565  MemOps.push_back(Store);
3566  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3567  }
3568 
3569  if (!MemOps.empty())
3570  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3571  return FrameIndex;
3572 }
3573 
3574 // Setup stack frame, the va_list pointer will start from.
3575 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3576  const SDLoc &dl, SDValue &Chain,
3577  unsigned ArgOffset,
3578  unsigned TotalArgRegsSaveSize,
3579  bool ForceMutable) const {
3580  MachineFunction &MF = DAG.getMachineFunction();
3582 
3583  // Try to store any remaining integer argument regs
3584  // to their spots on the stack so that they may be loaded by dereferencing
3585  // the result of va_next.
3586  // If there is no regs to be stored, just point address after last
3587  // argument passed via stack.
3588  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3589  CCInfo.getInRegsParamsCount(),
3590  CCInfo.getNextStackOffset(), 4);
3591  AFI->setVarArgsFrameIndex(FrameIndex);
3592 }
3593 
3594 SDValue ARMTargetLowering::LowerFormalArguments(
3595  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3596  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3597  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3598  MachineFunction &MF = DAG.getMachineFunction();
3599  MachineFrameInfo &MFI = MF.getFrameInfo();
3600 
3602 
3603  // Assign locations to all of the incoming arguments.
3605  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3606  *DAG.getContext());
3607  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3608 
3609  SmallVector<SDValue, 16> ArgValues;
3610  SDValue ArgValue;
3611  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3612  unsigned CurArgIdx = 0;
3613 
3614  // Initially ArgRegsSaveSize is zero.
3615  // Then we increase this value each time we meet byval parameter.
3616  // We also increase this value in case of varargs function.
3617  AFI->setArgRegsSaveSize(0);
3618 
3619  // Calculate the amount of stack space that we need to allocate to store
3620  // byval and variadic arguments that are passed in registers.
3621  // We need to know this before we allocate the first byval or variadic
3622  // argument, as they will be allocated a stack slot below the CFA (Canonical
3623  // Frame Address, the stack pointer at entry to the function).
3624  unsigned ArgRegBegin = ARM::R4;
3625  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3626  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3627  break;
3628 
3629  CCValAssign &VA = ArgLocs[i];
3630  unsigned Index = VA.getValNo();
3631  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3632  if (!Flags.isByVal())
3633  continue;
3634 
3635  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3636  unsigned RBegin, REnd;
3637  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3638  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3639 
3640  CCInfo.nextInRegsParam();
3641  }
3642  CCInfo.rewindByValRegsInfo();
3643 
3644  int lastInsIndex = -1;
3645  if (isVarArg && MFI.hasVAStart()) {
3646  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3647  if (RegIdx != array_lengthof(GPRArgRegs))
3648  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3649  }
3650 
3651  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3652  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3653  auto PtrVT = getPointerTy(DAG.getDataLayout());
3654 
3655  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3656  CCValAssign &VA = ArgLocs[i];
3657  if (Ins[VA.getValNo()].isOrigArg()) {
3658  std::advance(CurOrigArg,
3659  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3660  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3661  }
3662  // Arguments stored in registers.
3663  if (VA.isRegLoc()) {
3664  EVT RegVT = VA.getLocVT();
3665 
3666  if (VA.needsCustom()) {
3667  // f64 and vector types are split up into multiple registers or
3668  // combinations of registers and stack slots.
3669  if (VA.getLocVT() == MVT::v2f64) {
3670  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3671  Chain, DAG, dl);
3672  VA = ArgLocs[++i]; // skip ahead to next loc
3673  SDValue ArgValue2;
3674  if (VA.isMemLoc()) {
3675  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3676  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3677  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3679  DAG.getMachineFunction(), FI));
3680  } else {
3681  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3682  Chain, DAG, dl);
3683  }
3684  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3685  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3686  ArgValue, ArgValue1,
3687  DAG.getIntPtrConstant(0, dl));
3688  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3689  ArgValue, ArgValue2,
3690  DAG.getIntPtrConstant(1, dl));
3691  } else
3692  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3693 
3694  } else {
3695  const TargetRegisterClass *RC;
3696 
3697  if (RegVT == MVT::f32)
3698  RC = &ARM::SPRRegClass;
3699  else if (RegVT == MVT::f64)
3700  RC = &ARM::DPRRegClass;
3701  else if (RegVT == MVT::v2f64)
3702  RC = &ARM::QPRRegClass;
3703  else if (RegVT == MVT::i32)
3704  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3705  : &ARM::GPRRegClass;
3706  else
3707  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3708 
3709  // Transform the arguments in physical registers into virtual ones.
3710  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3711  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3712  }
3713 
3714  // If this is an 8 or 16-bit value, it is really passed promoted
3715  // to 32 bits. Insert an assert[sz]ext to capture this, then
3716  // truncate to the right size.
3717  switch (VA.getLocInfo()) {
3718  default: llvm_unreachable("Unknown loc info!");
3719  case CCValAssign::Full: break;
3720  case CCValAssign::BCvt:
3721  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3722  break;
3723  case CCValAssign::SExt:
3724  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3725  DAG.getValueType(VA.getValVT()));
3726  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3727  break;
3728  case CCValAssign::ZExt:
3729  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3730  DAG.getValueType(VA.getValVT()));
3731  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3732  break;
3733  }
3734 
3735  InVals.push_back(ArgValue);
3736 
3737  } else { // VA.isRegLoc()
3738  // sanity check
3739  assert(VA.isMemLoc());
3740  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3741 
3742  int index = VA.getValNo();
3743 
3744  // Some Ins[] entries become multiple ArgLoc[] entries.
3745  // Process them only once.
3746  if (index != lastInsIndex)
3747  {
3748  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3749  // FIXME: For now, all byval parameter objects are marked mutable.
3750  // This can be changed with more analysis.
3751  // In case of tail call optimization mark all arguments mutable.
3752  // Since they could be overwritten by lowering of arguments in case of
3753  // a tail call.
3754  if (Flags.isByVal()) {
3755  assert(Ins[index].isOrigArg() &&
3756  "Byval arguments cannot be implicit");
3757  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3758 
3759  int FrameIndex = StoreByValRegs(
3760  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3761  VA.getLocMemOffset(), Flags.getByValSize());
3762  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3763  CCInfo.nextInRegsParam();
3764  } else {
3765  unsigned FIOffset = VA.getLocMemOffset();
3766  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3767  FIOffset, true);
3768 
3769  // Create load nodes to retrieve arguments from the stack.
3770  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3771  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3773  DAG.getMachineFunction(), FI)));
3774  }
3775  lastInsIndex = index;
3776  }
3777  }
3778  }
3779 
3780  // varargs
3781  if (isVarArg && MFI.hasVAStart())
3782  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3783  CCInfo.getNextStackOffset(),
3784  TotalArgRegsSaveSize);
3785 
3787 
3788  return Chain;
3789 }
3790 
3791 /// isFloatingPointZero - Return true if this is +0.0.
3792 static bool isFloatingPointZero(SDValue Op) {
3793  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3794  return CFP->getValueAPF().isPosZero();
3795  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3796  // Maybe this has already been legalized into the constant pool?
3797  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3798  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3799  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3800  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3801  return CFP->getValueAPF().isPosZero();
3802  }
3803  } else if (Op->getOpcode() == ISD::BITCAST &&
3804  Op->getValueType(0) == MVT::f64) {
3805  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3806  // created by LowerConstantFP().
3807  SDValue BitcastOp = Op->getOperand(0);
3808  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3809  isNullConstant(BitcastOp->getOperand(0)))
3810  return true;
3811  }
3812  return false;
3813 }
3814 
3815 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3816 /// the given operands.
3817 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3818  SDValue &ARMcc, SelectionDAG &DAG,
3819  const SDLoc &dl) const {
3820  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3821  unsigned C = RHSC->getZExtValue();
3822  if (!isLegalICmpImmediate(C)) {
3823  // Constant does not fit, try adjusting it by one?
3824  switch (CC) {
3825  default: break;
3826  case ISD::SETLT:
3827  case ISD::SETGE:
3828  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3829  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3830  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3831  }
3832  break;
3833  case ISD::SETULT:
3834  case ISD::SETUGE:
3835  if (C != 0 && isLegalICmpImmediate(C-1)) {
3836  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3837  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3838  }
3839  break;
3840  case ISD::SETLE:
3841  case ISD::SETGT:
3842  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3843  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3844  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3845  }
3846  break;
3847  case ISD::SETULE:
3848  case ISD::SETUGT:
3849  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3850  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3851  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3852  }
3853  break;
3854  }
3855  }
3856  }
3857 
3859  ARMISD::NodeType CompareType;
3860  switch (CondCode) {
3861  default:
3862  CompareType = ARMISD::CMP;
3863  break;
3864  case ARMCC::EQ:
3865  case ARMCC::NE:
3866  // Uses only Z Flag
3867  CompareType = ARMISD::CMPZ;
3868  break;
3869  }
3870  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3871  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3872 }
3873 
3874 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3875 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3876  SelectionDAG &DAG, const SDLoc &dl,
3877  bool InvalidOnQNaN) const {
3878  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3879  SDValue Cmp;
3880  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3881  if (!isFloatingPointZero(RHS))
3882  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3883  else
3884  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3885  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3886 }
3887 
3888 /// duplicateCmp - Glue values can have only one use, so this function
3889 /// duplicates a comparison node.
3890 SDValue
3891 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3892  unsigned Opc = Cmp.getOpcode();
3893  SDLoc DL(Cmp);
3894  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3895  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3896 
3897  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3898  Cmp = Cmp.getOperand(0);
3899  Opc = Cmp.getOpcode();
3900  if (Opc == ARMISD::CMPFP)
3901  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3902  Cmp.getOperand(1), Cmp.getOperand(2));
3903  else {
3904  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3905  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3906  Cmp.getOperand(1));
3907  }
3908  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3909 }
3910 
3911 std::pair<SDValue, SDValue>
3912 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3913  SDValue &ARMcc) const {
3914  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3915 
3916  SDValue Value, OverflowCmp;
3917  SDValue LHS = Op.getOperand(0);
3918  SDValue RHS = Op.getOperand(1);
3919  SDLoc dl(Op);
3920 
3921  // FIXME: We are currently always generating CMPs because we don't support
3922  // generating CMN through the backend. This is not as good as the natural
3923  // CMP case because it causes a register dependency and cannot be folded
3924  // later.
3925 
3926  switch (Op.getOpcode()) {
3927  default:
3928  llvm_unreachable("Unknown overflow instruction!");
3929  case ISD::SADDO:
3930  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3931  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3932  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3933  break;
3934  case ISD::UADDO:
3935  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3936  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3937  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3938  break;
3939  case ISD::SSUBO:
3940  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3941  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3942  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3943  break;
3944  case ISD::USUBO:
3945  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3946  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3947  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3948  break;
3949  } // switch (...)
3950 
3951  return std::make_pair(Value, OverflowCmp);
3952 }
3953 
3954 SDValue
3955 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3956  // Let legalize expand this if it isn't a legal type yet.
3958  return SDValue();
3959 
3960  SDValue Value, OverflowCmp;
3961  SDValue ARMcc;
3962  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3963  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3964  SDLoc dl(Op);
3965  // We use 0 and 1 as false and true values.
3966  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3967  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3968  EVT VT = Op.getValueType();
3969 
3970  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3971  ARMcc, CCR, OverflowCmp);
3972 
3973  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3974  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3975 }
3976 
3977 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3978  SDValue Cond = Op.getOperand(0);
3979  SDValue SelectTrue = Op.getOperand(1);
3980  SDValue SelectFalse = Op.getOperand(2);
3981  SDLoc dl(Op);
3982  unsigned Opc = Cond.getOpcode();
3983 
3984  if (Cond.getResNo() == 1 &&
3985  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3986  Opc == ISD::USUBO)) {
3987  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3988  return SDValue();
3989 
3990  SDValue Value, OverflowCmp;
3991  SDValue ARMcc;
3992  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3993  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3994  EVT VT = Op.getValueType();
3995 
3996  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3997  OverflowCmp, DAG);
3998  }
3999 
4000  // Convert:
4001  //
4002  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4003  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4004  //
4005  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4006  const ConstantSDNode *CMOVTrue =
4008  const ConstantSDNode *CMOVFalse =
4010 
4011  if (CMOVTrue && CMOVFalse) {
4012  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4013  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4014 
4015  SDValue True;
4016  SDValue False;
4017  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4018  True = SelectTrue;
4019  False = SelectFalse;
4020  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4021  True = SelectFalse;
4022  False = SelectTrue;
4023  }
4024 
4025  if (True.getNode() && False.getNode()) {
4026  EVT VT = Op.getValueType();
4027  SDValue ARMcc = Cond.getOperand(2);
4028  SDValue CCR = Cond.getOperand(3);
4029  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4030  assert(True.getValueType() == VT);
4031  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4032  }
4033  }
4034  }
4035 
4036  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4037  // undefined bits before doing a full-word comparison with zero.
4038  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4039  DAG.getConstant(1, dl, Cond.getValueType()));
4040 
4041  return DAG.getSelectCC(dl, Cond,
4042  DAG.getConstant(0, dl, Cond.getValueType()),
4043  SelectTrue, SelectFalse, ISD::SETNE);
4044 }
4045 
4047  bool &swpCmpOps, bool &swpVselOps) {
4048  // Start by selecting the GE condition code for opcodes that return true for
4049  // 'equality'
4050  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4051  CC == ISD::SETULE)
4052  CondCode = ARMCC::GE;
4053 
4054  // and GT for opcodes that return false for 'equality'.
4055  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4056  CC == ISD::SETULT)
4057  CondCode = ARMCC::GT;
4058 
4059  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4060  // to swap the compare operands.
4061  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4062  CC == ISD::SETULT)
4063  swpCmpOps = true;
4064 
4065  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4066  // If we have an unordered opcode, we need to swap the operands to the VSEL
4067  // instruction (effectively negating the condition).
4068  //
4069  // This also has the effect of swapping which one of 'less' or 'greater'
4070  // returns true, so we also swap the compare operands. It also switches
4071  // whether we return true for 'equality', so we compensate by picking the
4072  // opposite condition code to our original choice.
4073  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4074  CC == ISD::SETUGT) {
4075  swpCmpOps = !swpCmpOps;
4076  swpVselOps = !swpVselOps;
4077  CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4078  }
4079 
4080  // 'ordered' is 'anything but unordered', so use the VS condition code and
4081  // swap the VSEL operands.
4082  if (CC == ISD::SETO) {
4083  CondCode = ARMCC::VS;
4084  swpVselOps = true;
4085  }
4086 
4087  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4088  // code and swap the VSEL operands.
4089  if (CC == ISD::SETUNE) {
4090  CondCode = ARMCC::EQ;
4091  swpVselOps = true;
4092  }
4093 }
4094 
4095 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4096  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4097  SDValue Cmp, SelectionDAG &DAG) const {
4098  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4099  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4100  DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4101  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4102  DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4103 
4104