LLVM  6.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
61 #include "llvm/IR/Attributes.h"
62 #include "llvm/IR/CallingConv.h"
63 #include "llvm/IR/Constant.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugLoc.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/IR/GlobalAlias.h"
70 #include "llvm/IR/GlobalValue.h"
71 #include "llvm/IR/GlobalVariable.h"
72 #include "llvm/IR/IRBuilder.h"
73 #include "llvm/IR/InlineAsm.h"
74 #include "llvm/IR/Instruction.h"
75 #include "llvm/IR/Instructions.h"
76 #include "llvm/IR/IntrinsicInst.h"
77 #include "llvm/IR/Intrinsics.h"
78 #include "llvm/IR/Module.h"
79 #include "llvm/IR/Type.h"
80 #include "llvm/IR/User.h"
81 #include "llvm/IR/Value.h"
82 #include "llvm/MC/MCInstrDesc.h"
84 #include "llvm/MC/MCRegisterInfo.h"
85 #include "llvm/MC/MCSchedule.h"
88 #include "llvm/Support/Casting.h"
89 #include "llvm/Support/CodeGen.h"
91 #include "llvm/Support/Compiler.h"
92 #include "llvm/Support/Debug.h"
94 #include "llvm/Support/KnownBits.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 
117 #define DEBUG_TYPE "arm-isel"
118 
119 STATISTIC(NumTailCalls, "Number of tail calls");
120 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 STATISTIC(NumConstpoolPromoted,
123  "Number of constants with their storage promoted into constant pools");
124 
125 static cl::opt<bool>
126 ARMInterworking("arm-interworking", cl::Hidden,
127  cl::desc("Enable / disable ARM interworking (for debugging only)"),
128  cl::init(true));
129 
131  "arm-promote-constant", cl::Hidden,
132  cl::desc("Enable / disable promotion of unnamed_addr constants into "
133  "constant pools"),
134  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
136  "arm-promote-constant-max-size", cl::Hidden,
137  cl::desc("Maximum size of constant to promote into a constant pool"),
138  cl::init(64));
140  "arm-promote-constant-max-total", cl::Hidden,
141  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142  cl::init(128));
143 
144 // The APCS parameter registers.
145 static const MCPhysReg GPRArgRegs[] = {
146  ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 };
148 
149 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150  MVT PromotedBitwiseVT) {
151  if (VT != PromotedLdStVT) {
153  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 
156  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157  }
158 
159  MVT ElemTy = VT.getVectorElementType();
160  if (ElemTy != MVT::f64)
164  if (ElemTy == MVT::i32) {
169  } else {
174  }
183  if (VT.isInteger()) {
187  }
188 
189  // Promote all bit-wise operations.
190  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197  }
198 
199  // Neon does not support vector divide/remainder operations.
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211 }
212 
213 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPRRegClass);
215  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 }
217 
218 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219  addRegisterClass(VT, &ARM::DPairRegClass);
220  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 }
222 
224  const ARMSubtarget &STI)
225  : TargetLowering(TM), Subtarget(&STI) {
226  RegInfo = Subtarget->getRegisterInfo();
227  Itins = Subtarget->getInstrItineraryData();
228 
231 
232  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233  !Subtarget->isTargetWatchOS()) {
234  const auto &E = Subtarget->getTargetTriple().getEnvironment();
235 
236  bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
238  // Windows is a special case. Technically, we will replace all of the "GNU"
239  // calls with calls to MSVCRT if appropriate and adjust the calling
240  // convention then.
241  IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
242 
243  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
244  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
245  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
247  }
248 
249  if (Subtarget->isTargetMachO()) {
250  // Uses VFP for Thumb libfuncs if available.
251  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
252  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
253  static const struct {
254  const RTLIB::Libcall Op;
255  const char * const Name;
256  const ISD::CondCode Cond;
257  } LibraryCalls[] = {
258  // Single-precision floating-point arithmetic.
259  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
261  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
262  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
263 
264  // Double-precision floating-point arithmetic.
265  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
266  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
267  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
268  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
269 
270  // Single-precision comparisons.
271  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
272  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
273  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
274  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
275  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
276  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
277  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
278  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
279 
280  // Double-precision comparisons.
281  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
282  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
283  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
284  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
285  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
286  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
287  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
288  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
289 
290  // Floating-point to integer conversions.
291  // i64 conversions are done via library routines even when generating VFP
292  // instructions, so use the same ones.
293  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
294  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
295  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
296  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
297 
298  // Conversions between floating types.
299  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
300  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
301 
302  // Integer to floating-point conversions.
303  // i64 conversions are done via library routines even when generating VFP
304  // instructions, so use the same ones.
305  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
306  // e.g., __floatunsidf vs. __floatunssidfvfp.
307  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
308  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
309  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
310  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
311  };
312 
313  for (const auto &LC : LibraryCalls) {
314  setLibcallName(LC.Op, LC.Name);
315  if (LC.Cond != ISD::SETCC_INVALID)
316  setCmpLibcallCC(LC.Op, LC.Cond);
317  }
318  }
319 
320  // Set the correct calling convention for ARMv7k WatchOS. It's just
321  // AAPCS_VFP for functions as simple as libcalls.
322  if (Subtarget->isTargetWatchABI()) {
323  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
325  }
326  }
327 
328  // These libcalls are not available in 32-bit.
329  setLibcallName(RTLIB::SHL_I128, nullptr);
330  setLibcallName(RTLIB::SRL_I128, nullptr);
331  setLibcallName(RTLIB::SRA_I128, nullptr);
332 
333  // RTLIB
334  if (Subtarget->isAAPCS_ABI() &&
335  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
336  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
337  static const struct {
338  const RTLIB::Libcall Op;
339  const char * const Name;
340  const CallingConv::ID CC;
341  const ISD::CondCode Cond;
342  } LibraryCalls[] = {
343  // Double-precision floating-point arithmetic helper functions
344  // RTABI chapter 4.1.2, Table 2
345  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
346  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
347  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
348  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
349 
350  // Double-precision floating-point comparison helper functions
351  // RTABI chapter 4.1.2, Table 3
352  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
353  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
354  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
355  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
356  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
357  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
358  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
359  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
360 
361  // Single-precision floating-point arithmetic helper functions
362  // RTABI chapter 4.1.2, Table 4
363  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
364  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
365  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
366  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
367 
368  // Single-precision floating-point comparison helper functions
369  // RTABI chapter 4.1.2, Table 5
370  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
371  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
372  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
373  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
374  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
375  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
376  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
377  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
378 
379  // Floating-point to integer conversions.
380  // RTABI chapter 4.1.2, Table 6
381  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
382  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
383  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
384  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389 
390  // Conversions between floating types.
391  // RTABI chapter 4.1.2, Table 7
392  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 
396  // Integer to floating-point conversions.
397  // RTABI chapter 4.1.2, Table 8
398  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
400  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
401  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406 
407  // Long long helper functions
408  // RTABI chapter 4.2, Table 9
409  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
410  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 
414  // Integer division functions
415  // RTABI chapter 4.3.1
416  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
417  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
418  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
419  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
420  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
421  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
422  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
423  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
424  };
425 
426  for (const auto &LC : LibraryCalls) {
427  setLibcallName(LC.Op, LC.Name);
428  setLibcallCallingConv(LC.Op, LC.CC);
429  if (LC.Cond != ISD::SETCC_INVALID)
430  setCmpLibcallCC(LC.Op, LC.Cond);
431  }
432 
433  // EABI dependent RTLIB
434  if (TM.Options.EABIVersion == EABI::EABI4 ||
436  static const struct {
437  const RTLIB::Libcall Op;
438  const char *const Name;
439  const CallingConv::ID CC;
440  const ISD::CondCode Cond;
441  } MemOpsLibraryCalls[] = {
442  // Memory operations
443  // RTABI chapter 4.3.4
445  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
446  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
447  };
448 
449  for (const auto &LC : MemOpsLibraryCalls) {
450  setLibcallName(LC.Op, LC.Name);
451  setLibcallCallingConv(LC.Op, LC.CC);
452  if (LC.Cond != ISD::SETCC_INVALID)
453  setCmpLibcallCC(LC.Op, LC.Cond);
454  }
455  }
456  }
457 
458  if (Subtarget->isTargetWindows()) {
459  static const struct {
460  const RTLIB::Libcall Op;
461  const char * const Name;
462  const CallingConv::ID CC;
463  } LibraryCalls[] = {
464  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
465  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
466  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
467  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
468  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
469  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
470  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
471  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
472  };
473 
474  for (const auto &LC : LibraryCalls) {
475  setLibcallName(LC.Op, LC.Name);
476  setLibcallCallingConv(LC.Op, LC.CC);
477  }
478  }
479 
480  // Use divmod compiler-rt calls for iOS 5.0 and later.
481  if (Subtarget->isTargetMachO() &&
482  !(Subtarget->isTargetIOS() &&
483  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
484  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
485  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
486  }
487 
488  // The half <-> float conversion functions are always soft-float on
489  // non-watchos platforms, but are needed for some targets which use a
490  // hard-float calling convention by default.
491  if (!Subtarget->isTargetWatchABI()) {
492  if (Subtarget->isAAPCS_ABI()) {
493  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
494  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
495  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
496  } else {
497  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
498  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
499  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
500  }
501  }
502 
503  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
504  // a __gnu_ prefix (which is the default).
505  if (Subtarget->isTargetAEABI()) {
506  static const struct {
507  const RTLIB::Libcall Op;
508  const char * const Name;
509  const CallingConv::ID CC;
510  } LibraryCalls[] = {
511  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
512  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
513  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
514  };
515 
516  for (const auto &LC : LibraryCalls) {
517  setLibcallName(LC.Op, LC.Name);
518  setLibcallCallingConv(LC.Op, LC.CC);
519  }
520  }
521 
522  if (Subtarget->isThumb1Only())
523  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
524  else
525  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
526 
527  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
528  !Subtarget->isThumb1Only()) {
529  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
530  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
531  }
532 
533  for (MVT VT : MVT::vector_valuetypes()) {
534  for (MVT InnerVT : MVT::vector_valuetypes()) {
535  setTruncStoreAction(VT, InnerVT, Expand);
536  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
537  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
538  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
539  }
540 
545 
547  }
548 
551 
554 
555  if (Subtarget->hasNEON()) {
556  addDRTypeForNEON(MVT::v2f32);
557  addDRTypeForNEON(MVT::v8i8);
558  addDRTypeForNEON(MVT::v4i16);
559  addDRTypeForNEON(MVT::v2i32);
560  addDRTypeForNEON(MVT::v1i64);
561 
562  addQRTypeForNEON(MVT::v4f32);
563  addQRTypeForNEON(MVT::v2f64);
564  addQRTypeForNEON(MVT::v16i8);
565  addQRTypeForNEON(MVT::v8i16);
566  addQRTypeForNEON(MVT::v4i32);
567  addQRTypeForNEON(MVT::v2i64);
568 
569  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
570  // neither Neon nor VFP support any arithmetic operations on it.
571  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
572  // supported for v4f32.
576  // FIXME: Code duplication: FDIV and FREM are expanded always, see
577  // ARMTargetLowering::addTypeForNEON method for details.
580  // FIXME: Create unittest.
581  // In another words, find a way when "copysign" appears in DAG with vector
582  // operands.
584  // FIXME: Code duplication: SETCC has custom operation action, see
585  // ARMTargetLowering::addTypeForNEON method for details.
587  // FIXME: Create unittest for FNEG and for FABS.
599  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
606 
621 
622  // Mark v2f32 intrinsics.
637 
638  // Neon does not support some operations on v1i64 and v2i64 types.
640  // Custom handling for some quad-vector types to detect VMULL.
644  // Custom handling for some vector types to avoid expensive expansions
649  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
650  // a destination type that is wider than the source, and nor does
651  // it have a FP_TO_[SU]INT instruction with a narrower destination than
652  // source.
657 
660 
661  // NEON does not have single instruction CTPOP for vectors with element
662  // types wider than 8-bits. However, custom lowering can leverage the
663  // v8i8/v16i8 vcnt instruction.
670 
673 
674  // NEON does not have single instruction CTTZ for vectors.
679 
684 
689 
694 
695  // NEON only has FMA instructions as of VFP4.
696  if (!Subtarget->hasVFP4()) {
699  }
700 
718 
719  // It is legal to extload from v4i8 to v4i16 or v4i32.
721  MVT::v2i32}) {
722  for (MVT VT : MVT::integer_vector_valuetypes()) {
726  }
727  }
728  }
729 
730  if (Subtarget->isFPOnlySP()) {
731  // When targeting a floating-point unit with only single-precision
732  // operations, f64 is legal for the few double-precision instructions which
733  // are present However, no double-precision operations other than moves,
734  // loads and stores are provided by the hardware.
767  }
768 
770 
771  // ARM does not have floating-point extending loads.
772  for (MVT VT : MVT::fp_valuetypes()) {
775  }
776 
777  // ... or truncating stores
781 
782  // ARM does not have i1 sign extending load.
783  for (MVT VT : MVT::integer_valuetypes())
785 
786  // ARM supports all 4 flavors of integer indexed load / store.
787  if (!Subtarget->isThumb1Only()) {
788  for (unsigned im = (unsigned)ISD::PRE_INC;
798  }
799  } else {
800  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
803  }
804 
809 
812 
813  // i64 operation support.
816  if (Subtarget->isThumb1Only()) {
819  }
820  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
821  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
823 
830 
835 
836  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
838 
839  // ARM does not have ROTL.
841  for (MVT VT : MVT::vector_valuetypes()) {
844  }
847  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
849 
850  // @llvm.readcyclecounter requires the Performance Monitors extension.
851  // Default to the 0 expansion on unsupported platforms.
852  // FIXME: Technically there are older ARM CPUs that have
853  // implementation-specific ways of obtaining this information.
854  if (Subtarget->hasPerfMon())
856 
857  // Only ARMv6 has BSWAP.
858  if (!Subtarget->hasV6Ops())
860 
861  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
862  : Subtarget->hasDivideInARMMode();
863  if (!hasDivide) {
864  // These are expanded into libcalls if the cpu doesn't have HW divider.
867  }
868 
869  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
872 
875  }
876 
879 
880  // Register based DivRem for AEABI (RTABI 4.2)
881  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
882  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
883  Subtarget->isTargetWindows()) {
886  HasStandaloneRem = false;
887 
888  if (Subtarget->isTargetWindows()) {
889  const struct {
890  const RTLIB::Libcall Op;
891  const char * const Name;
892  const CallingConv::ID CC;
893  } LibraryCalls[] = {
894  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
895  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
896  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
897  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
898 
899  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
900  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
901  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
902  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
903  };
904 
905  for (const auto &LC : LibraryCalls) {
906  setLibcallName(LC.Op, LC.Name);
907  setLibcallCallingConv(LC.Op, LC.CC);
908  }
909  } else {
910  const struct {
911  const RTLIB::Libcall Op;
912  const char * const Name;
913  const CallingConv::ID CC;
914  } LibraryCalls[] = {
915  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
916  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
917  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
918  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
919 
920  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
921  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
922  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
923  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
924  };
925 
926  for (const auto &LC : LibraryCalls) {
927  setLibcallName(LC.Op, LC.Name);
928  setLibcallCallingConv(LC.Op, LC.CC);
929  }
930  }
931 
936  } else {
939  }
940 
941  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
942  for (auto &VT : {MVT::f32, MVT::f64})
944 
949 
951 
952  // Use the default implementation.
959 
960  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
962  else
964 
965  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
966  // the default expansion.
967  InsertFencesForAtomic = false;
968  if (Subtarget->hasAnyDataBarrier() &&
969  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
970  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
971  // to ldrex/strex loops already.
973  if (!Subtarget->isThumb() || !Subtarget->isMClass())
975 
976  // On v8, we have particularly efficient implementations of atomic fences
977  // if they can be combined with nearby atomic loads and stores.
978  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
979  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
980  InsertFencesForAtomic = true;
981  }
982  } else {
983  // If there's anything we can use as a barrier, go through custom lowering
984  // for ATOMIC_FENCE.
985  // If target has DMB in thumb, Fences can be inserted.
986  if (Subtarget->hasDataBarrier())
987  InsertFencesForAtomic = true;
988 
990  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
991 
992  // Set them all for expansion, which will force libcalls.
1005  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1006  // Unordered/Monotonic case.
1007  if (!InsertFencesForAtomic) {
1010  }
1011  }
1012 
1014 
1015  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1016  if (!Subtarget->hasV6Ops()) {
1019  }
1021 
1022  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1023  !Subtarget->isThumb1Only()) {
1024  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1025  // iff target supports vfp2.
1028  }
1029 
1030  // We want to custom lower some of our intrinsics.
1035  if (Subtarget->useSjLjEH())
1036  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1037 
1047 
1048  // Thumb-1 cannot currently select ARMISD::SUBE.
1049  if (!Subtarget->isThumb1Only())
1051 
1057 
1058  // We don't support sin/cos/fmod/copysign/pow
1067  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1068  !Subtarget->isThumb1Only()) {
1071  }
1074 
1075  if (!Subtarget->hasVFP4()) {
1078  }
1079 
1080  // Various VFP goodness
1081  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1082  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1083  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1086  }
1087 
1088  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1089  if (!Subtarget->hasFP16()) {
1092  }
1093  }
1094 
1095  // Combine sin / cos into one node or libcall if possible.
1096  if (Subtarget->hasSinCos()) {
1097  setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1098  setLibcallName(RTLIB::SINCOS_F64, "sincos");
1099  if (Subtarget->isTargetWatchABI()) {
1102  }
1103  if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1104  // For iOS, we don't want to the normal expansion of a libcall to
1105  // sincos. We want to issue a libcall to __sincos_stret.
1108  }
1109  }
1110 
1111  // FP-ARMv8 implements a lot of rounding-like FP operations.
1112  if (Subtarget->hasFPARMv8()) {
1125 
1126  if (!Subtarget->isFPOnlySP()) {
1135  }
1136  }
1137 
1138  if (Subtarget->hasNEON()) {
1139  // vmin and vmax aren't available in a scalar form, so we use
1140  // a NEON instruction with an undef lane instead.
1147  }
1148 
1149  // We have target-specific dag combine patterns for the following nodes:
1150  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1157 
1158  if (Subtarget->hasV6Ops())
1160 
1162 
1163  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1164  !Subtarget->hasVFP2())
1166  else
1168 
1169  //// temporary - rewrite interface to use type
1170  MaxStoresPerMemset = 8;
1172  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1174  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1176 
1177  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1178  // are at least 4 bytes aligned.
1180 
1181  // Prefer likely predicted branches to selects on out-of-order cores.
1182  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1183 
1184  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1185 }
1186 
1188  return Subtarget->useSoftFloat();
1189 }
1190 
1191 // FIXME: It might make sense to define the representative register class as the
1192 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1193 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1194 // SPR's representative would be DPR_VFP2. This should work well if register
1195 // pressure tracking were modified such that a register use would increment the
1196 // pressure of the register class's representative and all of it's super
1197 // classes' representatives transitively. We have not implemented this because
1198 // of the difficulty prior to coalescing of modeling operand register classes
1199 // due to the common occurrence of cross class copies and subregister insertions
1200 // and extractions.
1201 std::pair<const TargetRegisterClass *, uint8_t>
1203  MVT VT) const {
1204  const TargetRegisterClass *RRC = nullptr;
1205  uint8_t Cost = 1;
1206  switch (VT.SimpleTy) {
1207  default:
1209  // Use DPR as representative register class for all floating point
1210  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1211  // the cost is 1 for both f32 and f64.
1212  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1213  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1214  RRC = &ARM::DPRRegClass;
1215  // When NEON is used for SP, only half of the register file is available
1216  // because operations that define both SP and DP results will be constrained
1217  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1218  // coalescing by double-counting the SP regs. See the FIXME above.
1219  if (Subtarget->useNEONForSinglePrecisionFP())
1220  Cost = 2;
1221  break;
1222  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1223  case MVT::v4f32: case MVT::v2f64:
1224  RRC = &ARM::DPRRegClass;
1225  Cost = 2;
1226  break;
1227  case MVT::v4i64:
1228  RRC = &ARM::DPRRegClass;
1229  Cost = 4;
1230  break;
1231  case MVT::v8i64:
1232  RRC = &ARM::DPRRegClass;
1233  Cost = 8;
1234  break;
1235  }
1236  return std::make_pair(RRC, Cost);
1237 }
1238 
1239 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1240  switch ((ARMISD::NodeType)Opcode) {
1241  case ARMISD::FIRST_NUMBER: break;
1242  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1243  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1244  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1245  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1246  case ARMISD::CALL: return "ARMISD::CALL";
1247  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1248  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1249  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1250  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1251  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1252  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1253  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1254  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1255  case ARMISD::CMP: return "ARMISD::CMP";
1256  case ARMISD::CMN: return "ARMISD::CMN";
1257  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1258  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1259  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1260  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1261  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1262 
1263  case ARMISD::CMOV: return "ARMISD::CMOV";
1264 
1265  case ARMISD::SSAT: return "ARMISD::SSAT";
1266 
1267  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1268  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1269  case ARMISD::RRX: return "ARMISD::RRX";
1270 
1271  case ARMISD::ADDC: return "ARMISD::ADDC";
1272  case ARMISD::ADDE: return "ARMISD::ADDE";
1273  case ARMISD::SUBC: return "ARMISD::SUBC";
1274  case ARMISD::SUBE: return "ARMISD::SUBE";
1275 
1276  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1277  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1278 
1279  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1280  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1281  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1282 
1283  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1284 
1285  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1286 
1287  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1288 
1289  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1290 
1291  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1292 
1293  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1294  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1295 
1296  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1297  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1298  case ARMISD::VCGE: return "ARMISD::VCGE";
1299  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1300  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1301  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1302  case ARMISD::VCGT: return "ARMISD::VCGT";
1303  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1304  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1305  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1306  case ARMISD::VTST: return "ARMISD::VTST";
1307 
1308  case ARMISD::VSHL: return "ARMISD::VSHL";
1309  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1310  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1311  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1312  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1313  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1314  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1315  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1316  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1317  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1318  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1319  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1320  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1321  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1322  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1323  case ARMISD::VSLI: return "ARMISD::VSLI";
1324  case ARMISD::VSRI: return "ARMISD::VSRI";
1325  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1326  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1327  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1328  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1329  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1330  case ARMISD::VDUP: return "ARMISD::VDUP";
1331  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1332  case ARMISD::VEXT: return "ARMISD::VEXT";
1333  case ARMISD::VREV64: return "ARMISD::VREV64";
1334  case ARMISD::VREV32: return "ARMISD::VREV32";
1335  case ARMISD::VREV16: return "ARMISD::VREV16";
1336  case ARMISD::VZIP: return "ARMISD::VZIP";
1337  case ARMISD::VUZP: return "ARMISD::VUZP";
1338  case ARMISD::VTRN: return "ARMISD::VTRN";
1339  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1340  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1341  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1342  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1343  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1344  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1345  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1346  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1347  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1348  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1349  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1350  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1351  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1352  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1353  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1354  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1355  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1356  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1357  case ARMISD::BFI: return "ARMISD::BFI";
1358  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1359  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1360  case ARMISD::VBSL: return "ARMISD::VBSL";
1361  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1362  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1363  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1364  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1365  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1366  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1367  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1368  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1369  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1370  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1371  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1372  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1373  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1374  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1375  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1376  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1377  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1378  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1379  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1380  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1381  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1382  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1383  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1384  }
1385  return nullptr;
1386 }
1387 
1389  EVT VT) const {
1390  if (!VT.isVector())
1391  return getPointerTy(DL);
1393 }
1394 
1395 /// getRegClassFor - Return the register class that should be used for the
1396 /// specified value type.
1398  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1399  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1400  // load / store 4 to 8 consecutive D registers.
1401  if (Subtarget->hasNEON()) {
1402  if (VT == MVT::v4i64)
1403  return &ARM::QQPRRegClass;
1404  if (VT == MVT::v8i64)
1405  return &ARM::QQQQPRRegClass;
1406  }
1407  return TargetLowering::getRegClassFor(VT);
1408 }
1409 
1410 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1411 // source/dest is aligned and the copy size is large enough. We therefore want
1412 // to align such objects passed to memory intrinsics.
1414  unsigned &PrefAlign) const {
1415  if (!isa<MemIntrinsic>(CI))
1416  return false;
1417  MinSize = 8;
1418  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1419  // cycle faster than 4-byte aligned LDM.
1420  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1421  return true;
1422 }
1423 
1424 // Create a fast isel object.
1425 FastISel *
1427  const TargetLibraryInfo *libInfo) const {
1428  return ARM::createFastISel(funcInfo, libInfo);
1429 }
1430 
1432  unsigned NumVals = N->getNumValues();
1433  if (!NumVals)
1434  return Sched::RegPressure;
1435 
1436  for (unsigned i = 0; i != NumVals; ++i) {
1437  EVT VT = N->getValueType(i);
1438  if (VT == MVT::Glue || VT == MVT::Other)
1439  continue;
1440  if (VT.isFloatingPoint() || VT.isVector())
1441  return Sched::ILP;
1442  }
1443 
1444  if (!N->isMachineOpcode())
1445  return Sched::RegPressure;
1446 
1447  // Load are scheduled for latency even if there instruction itinerary
1448  // is not available.
1449  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1450  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1451 
1452  if (MCID.getNumDefs() == 0)
1453  return Sched::RegPressure;
1454  if (!Itins->isEmpty() &&
1455  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1456  return Sched::ILP;
1457 
1458  return Sched::RegPressure;
1459 }
1460 
1461 //===----------------------------------------------------------------------===//
1462 // Lowering Code
1463 //===----------------------------------------------------------------------===//
1464 
1465 static bool isSRL16(const SDValue &Op) {
1466  if (Op.getOpcode() != ISD::SRL)
1467  return false;
1468  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1469  return Const->getZExtValue() == 16;
1470  return false;
1471 }
1472 
1473 static bool isSRA16(const SDValue &Op) {
1474  if (Op.getOpcode() != ISD::SRA)
1475  return false;
1476  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1477  return Const->getZExtValue() == 16;
1478  return false;
1479 }
1480 
1481 static bool isSHL16(const SDValue &Op) {
1482  if (Op.getOpcode() != ISD::SHL)
1483  return false;
1484  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1485  return Const->getZExtValue() == 16;
1486  return false;
1487 }
1488 
1489 // Check for a signed 16-bit value. We special case SRA because it makes it
1490 // more simple when also looking for SRAs that aren't sign extending a
1491 // smaller value. Without the check, we'd need to take extra care with
1492 // checking order for some operations.
1493 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1494  if (isSRA16(Op))
1495  return isSHL16(Op.getOperand(0));
1496  return DAG.ComputeNumSignBits(Op) == 17;
1497 }
1498 
1499 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1501  switch (CC) {
1502  default: llvm_unreachable("Unknown condition code!");
1503  case ISD::SETNE: return ARMCC::NE;
1504  case ISD::SETEQ: return ARMCC::EQ;
1505  case ISD::SETGT: return ARMCC::GT;
1506  case ISD::SETGE: return ARMCC::GE;
1507  case ISD::SETLT: return ARMCC::LT;
1508  case ISD::SETLE: return ARMCC::LE;
1509  case ISD::SETUGT: return ARMCC::HI;
1510  case ISD::SETUGE: return ARMCC::HS;
1511  case ISD::SETULT: return ARMCC::LO;
1512  case ISD::SETULE: return ARMCC::LS;
1513  }
1514 }
1515 
1516 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1518  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1519  CondCode2 = ARMCC::AL;
1520  InvalidOnQNaN = true;
1521  switch (CC) {
1522  default: llvm_unreachable("Unknown FP condition!");
1523  case ISD::SETEQ:
1524  case ISD::SETOEQ:
1525  CondCode = ARMCC::EQ;
1526  InvalidOnQNaN = false;
1527  break;
1528  case ISD::SETGT:
1529  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1530  case ISD::SETGE:
1531  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1532  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1533  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1534  case ISD::SETONE:
1535  CondCode = ARMCC::MI;
1536  CondCode2 = ARMCC::GT;
1537  InvalidOnQNaN = false;
1538  break;
1539  case ISD::SETO: CondCode = ARMCC::VC; break;
1540  case ISD::SETUO: CondCode = ARMCC::VS; break;
1541  case ISD::SETUEQ:
1542  CondCode = ARMCC::EQ;
1543  CondCode2 = ARMCC::VS;
1544  InvalidOnQNaN = false;
1545  break;
1546  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1547  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1548  case ISD::SETLT:
1549  case ISD::SETULT: CondCode = ARMCC::LT; break;
1550  case ISD::SETLE:
1551  case ISD::SETULE: CondCode = ARMCC::LE; break;
1552  case ISD::SETNE:
1553  case ISD::SETUNE:
1554  CondCode = ARMCC::NE;
1555  InvalidOnQNaN = false;
1556  break;
1557  }
1558 }
1559 
1560 //===----------------------------------------------------------------------===//
1561 // Calling Convention Implementation
1562 //===----------------------------------------------------------------------===//
1563 
1564 #include "ARMGenCallingConv.inc"
1565 
1566 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1567 /// account presence of floating point hardware and calling convention
1568 /// limitations, such as support for variadic functions.
1570 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1571  bool isVarArg) const {
1572  switch (CC) {
1573  default:
1574  report_fatal_error("Unsupported calling convention");
1576  case CallingConv::ARM_APCS:
1577  case CallingConv::GHC:
1578  return CC;
1582  case CallingConv::Swift:
1584  case CallingConv::C:
1585  if (!Subtarget->isAAPCS_ABI())
1586  return CallingConv::ARM_APCS;
1587  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1589  !isVarArg)
1591  else
1592  return CallingConv::ARM_AAPCS;
1593  case CallingConv::Fast:
1595  if (!Subtarget->isAAPCS_ABI()) {
1596  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1597  return CallingConv::Fast;
1598  return CallingConv::ARM_APCS;
1599  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1601  else
1602  return CallingConv::ARM_AAPCS;
1603  }
1604 }
1605 
1607  bool isVarArg) const {
1608  return CCAssignFnForNode(CC, false, isVarArg);
1609 }
1610 
1612  bool isVarArg) const {
1613  return CCAssignFnForNode(CC, true, isVarArg);
1614 }
1615 
1616 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1617 /// CallingConvention.
1618 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1619  bool Return,
1620  bool isVarArg) const {
1621  switch (getEffectiveCallingConv(CC, isVarArg)) {
1622  default:
1623  report_fatal_error("Unsupported calling convention");
1624  case CallingConv::ARM_APCS:
1625  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1627  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1629  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1630  case CallingConv::Fast:
1631  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1632  case CallingConv::GHC:
1633  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1635  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1636  }
1637 }
1638 
1639 /// LowerCallResult - Lower the result values of a call into the
1640 /// appropriate copies out of appropriate physical registers.
1641 SDValue ARMTargetLowering::LowerCallResult(
1642  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1643  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1644  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1645  SDValue ThisVal) const {
1646  // Assign locations to each value returned by this call.
1648  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1649  *DAG.getContext());
1650  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1651 
1652  // Copy all of the result registers out of their specified physreg.
1653  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1654  CCValAssign VA = RVLocs[i];
1655 
1656  // Pass 'this' value directly from the argument to return value, to avoid
1657  // reg unit interference
1658  if (i == 0 && isThisReturn) {
1659  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1660  "unexpected return calling convention register assignment");
1661  InVals.push_back(ThisVal);
1662  continue;
1663  }
1664 
1665  SDValue Val;
1666  if (VA.needsCustom()) {
1667  // Handle f64 or half of a v2f64.
1668  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1669  InFlag);
1670  Chain = Lo.getValue(1);
1671  InFlag = Lo.getValue(2);
1672  VA = RVLocs[++i]; // skip ahead to next loc
1673  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1674  InFlag);
1675  Chain = Hi.getValue(1);
1676  InFlag = Hi.getValue(2);
1677  if (!Subtarget->isLittle())
1678  std::swap (Lo, Hi);
1679  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1680 
1681  if (VA.getLocVT() == MVT::v2f64) {
1682  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1683  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1684  DAG.getConstant(0, dl, MVT::i32));
1685 
1686  VA = RVLocs[++i]; // skip ahead to next loc
1687  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1688  Chain = Lo.getValue(1);
1689  InFlag = Lo.getValue(2);
1690  VA = RVLocs[++i]; // skip ahead to next loc
1691  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1692  Chain = Hi.getValue(1);
1693  InFlag = Hi.getValue(2);
1694  if (!Subtarget->isLittle())
1695  std::swap (Lo, Hi);
1696  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1697  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1698  DAG.getConstant(1, dl, MVT::i32));
1699  }
1700  } else {
1701  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1702  InFlag);
1703  Chain = Val.getValue(1);
1704  InFlag = Val.getValue(2);
1705  }
1706 
1707  switch (VA.getLocInfo()) {
1708  default: llvm_unreachable("Unknown loc info!");
1709  case CCValAssign::Full: break;
1710  case CCValAssign::BCvt:
1711  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1712  break;
1713  }
1714 
1715  InVals.push_back(Val);
1716  }
1717 
1718  return Chain;
1719 }
1720 
1721 /// LowerMemOpCallTo - Store the argument to the stack.
1722 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1723  SDValue Arg, const SDLoc &dl,
1724  SelectionDAG &DAG,
1725  const CCValAssign &VA,
1726  ISD::ArgFlagsTy Flags) const {
1727  unsigned LocMemOffset = VA.getLocMemOffset();
1728  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1729  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1730  StackPtr, PtrOff);
1731  return DAG.getStore(
1732  Chain, dl, Arg, PtrOff,
1733  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1734 }
1735 
1736 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1737  SDValue Chain, SDValue &Arg,
1738  RegsToPassVector &RegsToPass,
1739  CCValAssign &VA, CCValAssign &NextVA,
1740  SDValue &StackPtr,
1741  SmallVectorImpl<SDValue> &MemOpChains,
1742  ISD::ArgFlagsTy Flags) const {
1743  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1744  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1745  unsigned id = Subtarget->isLittle() ? 0 : 1;
1746  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1747 
1748  if (NextVA.isRegLoc())
1749  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1750  else {
1751  assert(NextVA.isMemLoc());
1752  if (!StackPtr.getNode())
1753  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1754  getPointerTy(DAG.getDataLayout()));
1755 
1756  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1757  dl, DAG, NextVA,
1758  Flags));
1759  }
1760 }
1761 
1762 /// LowerCall - Lowering a call into a callseq_start <-
1763 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1764 /// nodes.
1765 SDValue
1766 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1767  SmallVectorImpl<SDValue> &InVals) const {
1768  SelectionDAG &DAG = CLI.DAG;
1769  SDLoc &dl = CLI.DL;
1771  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1773  SDValue Chain = CLI.Chain;
1774  SDValue Callee = CLI.Callee;
1775  bool &isTailCall = CLI.IsTailCall;
1776  CallingConv::ID CallConv = CLI.CallConv;
1777  bool doesNotRet = CLI.DoesNotReturn;
1778  bool isVarArg = CLI.IsVarArg;
1779 
1780  MachineFunction &MF = DAG.getMachineFunction();
1781  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1782  bool isThisReturn = false;
1783  bool isSibCall = false;
1784  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1785 
1786  // Disable tail calls if they're not supported.
1787  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1788  isTailCall = false;
1789 
1790  if (isTailCall) {
1791  // Check if it's really possible to do a tail call.
1792  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1793  isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1794  Outs, OutVals, Ins, DAG);
1795  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1796  report_fatal_error("failed to perform tail call elimination on a call "
1797  "site marked musttail");
1798  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1799  // detected sibcalls.
1800  if (isTailCall) {
1801  ++NumTailCalls;
1802  isSibCall = true;
1803  }
1804  }
1805 
1806  // Analyze operands of the call, assigning locations to each operand.
1808  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1809  *DAG.getContext());
1810  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1811 
1812  // Get a count of how many bytes are to be pushed on the stack.
1813  unsigned NumBytes = CCInfo.getNextStackOffset();
1814 
1815  // For tail calls, memory operands are available in our caller's stack.
1816  if (isSibCall)
1817  NumBytes = 0;
1818 
1819  // Adjust the stack pointer for the new arguments...
1820  // These operations are automatically eliminated by the prolog/epilog pass
1821  if (!isSibCall)
1822  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1823 
1824  SDValue StackPtr =
1825  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1826 
1827  RegsToPassVector RegsToPass;
1828  SmallVector<SDValue, 8> MemOpChains;
1829 
1830  // Walk the register/memloc assignments, inserting copies/loads. In the case
1831  // of tail call optimization, arguments are handled later.
1832  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1833  i != e;
1834  ++i, ++realArgIdx) {
1835  CCValAssign &VA = ArgLocs[i];
1836  SDValue Arg = OutVals[realArgIdx];
1837  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1838  bool isByVal = Flags.isByVal();
1839 
1840  // Promote the value if needed.
1841  switch (VA.getLocInfo()) {
1842  default: llvm_unreachable("Unknown loc info!");
1843  case CCValAssign::Full: break;
1844  case CCValAssign::SExt:
1845  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1846  break;
1847  case CCValAssign::ZExt:
1848  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1849  break;
1850  case CCValAssign::AExt:
1851  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1852  break;
1853  case CCValAssign::BCvt:
1854  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1855  break;
1856  }
1857 
1858  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1859  if (VA.needsCustom()) {
1860  if (VA.getLocVT() == MVT::v2f64) {
1861  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1862  DAG.getConstant(0, dl, MVT::i32));
1863  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1864  DAG.getConstant(1, dl, MVT::i32));
1865 
1866  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1867  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1868 
1869  VA = ArgLocs[++i]; // skip ahead to next loc
1870  if (VA.isRegLoc()) {
1871  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1872  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1873  } else {
1874  assert(VA.isMemLoc());
1875 
1876  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1877  dl, DAG, VA, Flags));
1878  }
1879  } else {
1880  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1881  StackPtr, MemOpChains, Flags);
1882  }
1883  } else if (VA.isRegLoc()) {
1884  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1885  Outs[0].VT == MVT::i32) {
1886  assert(VA.getLocVT() == MVT::i32 &&
1887  "unexpected calling convention register assignment");
1888  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1889  "unexpected use of 'returned'");
1890  isThisReturn = true;
1891  }
1892  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1893  } else if (isByVal) {
1894  assert(VA.isMemLoc());
1895  unsigned offset = 0;
1896 
1897  // True if this byval aggregate will be split between registers
1898  // and memory.
1899  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1900  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1901 
1902  if (CurByValIdx < ByValArgsCount) {
1903 
1904  unsigned RegBegin, RegEnd;
1905  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1906 
1907  EVT PtrVT =
1909  unsigned int i, j;
1910  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1911  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1912  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1913  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1915  DAG.InferPtrAlignment(AddArg));
1916  MemOpChains.push_back(Load.getValue(1));
1917  RegsToPass.push_back(std::make_pair(j, Load));
1918  }
1919 
1920  // If parameter size outsides register area, "offset" value
1921  // helps us to calculate stack slot for remained part properly.
1922  offset = RegEnd - RegBegin;
1923 
1924  CCInfo.nextInRegsParam();
1925  }
1926 
1927  if (Flags.getByValSize() > 4*offset) {
1928  auto PtrVT = getPointerTy(DAG.getDataLayout());
1929  unsigned LocMemOffset = VA.getLocMemOffset();
1930  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1931  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1932  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1933  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1934  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1935  MVT::i32);
1936  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1937  MVT::i32);
1938 
1939  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1940  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1941  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1942  Ops));
1943  }
1944  } else if (!isSibCall) {
1945  assert(VA.isMemLoc());
1946 
1947  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1948  dl, DAG, VA, Flags));
1949  }
1950  }
1951 
1952  if (!MemOpChains.empty())
1953  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1954 
1955  // Build a sequence of copy-to-reg nodes chained together with token chain
1956  // and flag operands which copy the outgoing args into the appropriate regs.
1957  SDValue InFlag;
1958  // Tail call byval lowering might overwrite argument registers so in case of
1959  // tail call optimization the copies to registers are lowered later.
1960  if (!isTailCall)
1961  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1962  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1963  RegsToPass[i].second, InFlag);
1964  InFlag = Chain.getValue(1);
1965  }
1966 
1967  // For tail calls lower the arguments to the 'real' stack slot.
1968  if (isTailCall) {
1969  // Force all the incoming stack arguments to be loaded from the stack
1970  // before any new outgoing arguments are stored to the stack, because the
1971  // outgoing stack slots may alias the incoming argument stack slots, and
1972  // the alias isn't otherwise explicit. This is slightly more conservative
1973  // than necessary, because it means that each store effectively depends
1974  // on every argument instead of just those arguments it would clobber.
1975 
1976  // Do not flag preceding copytoreg stuff together with the following stuff.
1977  InFlag = SDValue();
1978  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1979  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1980  RegsToPass[i].second, InFlag);
1981  InFlag = Chain.getValue(1);
1982  }
1983  InFlag = SDValue();
1984  }
1985 
1986  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1987  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1988  // node so that legalize doesn't hack it.
1989  bool isDirect = false;
1990 
1991  const TargetMachine &TM = getTargetMachine();
1992  const Module *Mod = MF.getFunction()->getParent();
1993  const GlobalValue *GV = nullptr;
1994  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1995  GV = G->getGlobal();
1996  bool isStub =
1997  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1998 
1999  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2000  bool isLocalARMFunc = false;
2002  auto PtrVt = getPointerTy(DAG.getDataLayout());
2003 
2004  if (Subtarget->genLongCalls()) {
2005  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2006  "long-calls codegen is not position independent!");
2007  // Handle a global address or an external symbol. If it's not one of
2008  // those, the target's already in a register, so we don't need to do
2009  // anything extra.
2010  if (isa<GlobalAddressSDNode>(Callee)) {
2011  // Create a constant pool entry for the callee address
2012  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2013  ARMConstantPoolValue *CPV =
2014  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2015 
2016  // Get the address of the callee into a register
2017  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2018  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2019  Callee = DAG.getLoad(
2020  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2022  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2023  const char *Sym = S->getSymbol();
2024 
2025  // Create a constant pool entry for the callee address
2026  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2027  ARMConstantPoolValue *CPV =
2029  ARMPCLabelIndex, 0);
2030  // Get the address of the callee into a register
2031  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2032  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2033  Callee = DAG.getLoad(
2034  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2036  }
2037  } else if (isa<GlobalAddressSDNode>(Callee)) {
2038  // If we're optimizing for minimum size and the function is called three or
2039  // more times in this block, we can improve codesize by calling indirectly
2040  // as BLXr has a 16-bit encoding.
2041  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2042  auto *BB = CLI.CS.getParent();
2043  bool PreferIndirect =
2044  Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2045  count_if(GV->users(), [&BB](const User *U) {
2046  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2047  }) > 2;
2048 
2049  if (!PreferIndirect) {
2050  isDirect = true;
2051  bool isDef = GV->isStrongDefinitionForLinker();
2052 
2053  // ARM call to a local ARM function is predicable.
2054  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2055  // tBX takes a register source operand.
2056  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2057  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2058  Callee = DAG.getNode(
2059  ARMISD::WrapperPIC, dl, PtrVt,
2060  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2061  Callee = DAG.getLoad(
2062  PtrVt, dl, DAG.getEntryNode(), Callee,
2064  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2066  } else if (Subtarget->isTargetCOFF()) {
2067  assert(Subtarget->isTargetWindows() &&
2068  "Windows is the only supported COFF target");
2069  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2072  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2073  TargetFlags);
2074  if (GV->hasDLLImportStorageClass())
2075  Callee =
2076  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2077  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2079  } else {
2080  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2081  }
2082  }
2083  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2084  isDirect = true;
2085  // tBX takes a register source operand.
2086  const char *Sym = S->getSymbol();
2087  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2088  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2089  ARMConstantPoolValue *CPV =
2091  ARMPCLabelIndex, 4);
2092  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2093  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2094  Callee = DAG.getLoad(
2095  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2097  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2098  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2099  } else {
2100  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2101  }
2102  }
2103 
2104  // FIXME: handle tail calls differently.
2105  unsigned CallOpc;
2106  if (Subtarget->isThumb()) {
2107  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2108  CallOpc = ARMISD::CALL_NOLINK;
2109  else
2110  CallOpc = ARMISD::CALL;
2111  } else {
2112  if (!isDirect && !Subtarget->hasV5TOps())
2113  CallOpc = ARMISD::CALL_NOLINK;
2114  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2115  // Emit regular call when code size is the priority
2116  !MF.getFunction()->optForMinSize())
2117  // "mov lr, pc; b _foo" to avoid confusing the RSP
2118  CallOpc = ARMISD::CALL_NOLINK;
2119  else
2120  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2121  }
2122 
2123  std::vector<SDValue> Ops;
2124  Ops.push_back(Chain);
2125  Ops.push_back(Callee);
2126 
2127  // Add argument registers to the end of the list so that they are known live
2128  // into the call.
2129  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2130  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2131  RegsToPass[i].second.getValueType()));
2132 
2133  // Add a register mask operand representing the call-preserved registers.
2134  if (!isTailCall) {
2135  const uint32_t *Mask;
2136  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2137  if (isThisReturn) {
2138  // For 'this' returns, use the R0-preserving mask if applicable
2139  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2140  if (!Mask) {
2141  // Set isThisReturn to false if the calling convention is not one that
2142  // allows 'returned' to be modeled in this way, so LowerCallResult does
2143  // not try to pass 'this' straight through
2144  isThisReturn = false;
2145  Mask = ARI->getCallPreservedMask(MF, CallConv);
2146  }
2147  } else
2148  Mask = ARI->getCallPreservedMask(MF, CallConv);
2149 
2150  assert(Mask && "Missing call preserved mask for calling convention");
2151  Ops.push_back(DAG.getRegisterMask(Mask));
2152  }
2153 
2154  if (InFlag.getNode())
2155  Ops.push_back(InFlag);
2156 
2157  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2158  if (isTailCall) {
2160  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2161  }
2162 
2163  // Returns a chain and a flag for retval copy to use.
2164  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2165  InFlag = Chain.getValue(1);
2166 
2167  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2168  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2169  if (!Ins.empty())
2170  InFlag = Chain.getValue(1);
2171 
2172  // Handle result values, copying them out of physregs into vregs that we
2173  // return.
2174  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2175  InVals, isThisReturn,
2176  isThisReturn ? OutVals[0] : SDValue());
2177 }
2178 
2179 /// HandleByVal - Every parameter *after* a byval parameter is passed
2180 /// on the stack. Remember the next parameter register to allocate,
2181 /// and then confiscate the rest of the parameter registers to insure
2182 /// this.
2183 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2184  unsigned Align) const {
2185  // Byval (as with any stack) slots are always at least 4 byte aligned.
2186  Align = std::max(Align, 4U);
2187 
2188  unsigned Reg = State->AllocateReg(GPRArgRegs);
2189  if (!Reg)
2190  return;
2191 
2192  unsigned AlignInRegs = Align / 4;
2193  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2194  for (unsigned i = 0; i < Waste; ++i)
2195  Reg = State->AllocateReg(GPRArgRegs);
2196 
2197  if (!Reg)
2198  return;
2199 
2200  unsigned Excess = 4 * (ARM::R4 - Reg);
2201 
2202  // Special case when NSAA != SP and parameter size greater than size of
2203  // all remained GPR regs. In that case we can't split parameter, we must
2204  // send it to stack. We also must set NCRN to R4, so waste all
2205  // remained registers.
2206  const unsigned NSAAOffset = State->getNextStackOffset();
2207  if (NSAAOffset != 0 && Size > Excess) {
2208  while (State->AllocateReg(GPRArgRegs))
2209  ;
2210  return;
2211  }
2212 
2213  // First register for byval parameter is the first register that wasn't
2214  // allocated before this method call, so it would be "reg".
2215  // If parameter is small enough to be saved in range [reg, r4), then
2216  // the end (first after last) register would be reg + param-size-in-regs,
2217  // else parameter would be splitted between registers and stack,
2218  // end register would be r4 in this case.
2219  unsigned ByValRegBegin = Reg;
2220  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2221  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2222  // Note, first register is allocated in the beginning of function already,
2223  // allocate remained amount of registers we need.
2224  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2225  State->AllocateReg(GPRArgRegs);
2226  // A byval parameter that is split between registers and memory needs its
2227  // size truncated here.
2228  // In the case where the entire structure fits in registers, we set the
2229  // size in memory to zero.
2230  Size = std::max<int>(Size - Excess, 0);
2231 }
2232 
2233 /// MatchingStackOffset - Return true if the given stack call argument is
2234 /// already available in the same position (relatively) of the caller's
2235 /// incoming argument stack.
2236 static
2239  const TargetInstrInfo *TII) {
2240  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2241  int FI = std::numeric_limits<int>::max();
2242  if (Arg.getOpcode() == ISD::CopyFromReg) {
2243  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2245  return false;
2246  MachineInstr *Def = MRI->getVRegDef(VR);
2247  if (!Def)
2248  return false;
2249  if (!Flags.isByVal()) {
2250  if (!TII->isLoadFromStackSlot(*Def, FI))
2251  return false;
2252  } else {
2253  return false;
2254  }
2255  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2256  if (Flags.isByVal())
2257  // ByVal argument is passed in as a pointer but it's now being
2258  // dereferenced. e.g.
2259  // define @foo(%struct.X* %A) {
2260  // tail call @bar(%struct.X* byval %A)
2261  // }
2262  return false;
2263  SDValue Ptr = Ld->getBasePtr();
2264  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2265  if (!FINode)
2266  return false;
2267  FI = FINode->getIndex();
2268  } else
2269  return false;
2270 
2272  if (!MFI.isFixedObjectIndex(FI))
2273  return false;
2274  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2275 }
2276 
2277 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2278 /// for tail call optimization. Targets which want to do tail call
2279 /// optimization should implement this function.
2280 bool
2281 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2282  CallingConv::ID CalleeCC,
2283  bool isVarArg,
2284  bool isCalleeStructRet,
2285  bool isCallerStructRet,
2286  const SmallVectorImpl<ISD::OutputArg> &Outs,
2287  const SmallVectorImpl<SDValue> &OutVals,
2288  const SmallVectorImpl<ISD::InputArg> &Ins,
2289  SelectionDAG& DAG) const {
2290  MachineFunction &MF = DAG.getMachineFunction();
2291  const Function *CallerF = MF.getFunction();
2292  CallingConv::ID CallerCC = CallerF->getCallingConv();
2293 
2294  assert(Subtarget->supportsTailCall());
2295 
2296  // Look for obvious safe cases to perform tail call optimization that do not
2297  // require ABI changes. This is what gcc calls sibcall.
2298 
2299  // Exception-handling functions need a special set of instructions to indicate
2300  // a return to the hardware. Tail-calling another function would probably
2301  // break this.
2302  if (CallerF->hasFnAttribute("interrupt"))
2303  return false;
2304 
2305  // Also avoid sibcall optimization if either caller or callee uses struct
2306  // return semantics.
2307  if (isCalleeStructRet || isCallerStructRet)
2308  return false;
2309 
2310  // Externally-defined functions with weak linkage should not be
2311  // tail-called on ARM when the OS does not support dynamic
2312  // pre-emption of symbols, as the AAELF spec requires normal calls
2313  // to undefined weak functions to be replaced with a NOP or jump to the
2314  // next instruction. The behaviour of branch instructions in this
2315  // situation (as used for tail calls) is implementation-defined, so we
2316  // cannot rely on the linker replacing the tail call with a return.
2317  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2318  const GlobalValue *GV = G->getGlobal();
2319  const Triple &TT = getTargetMachine().getTargetTriple();
2320  if (GV->hasExternalWeakLinkage() &&
2321  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2322  return false;
2323  }
2324 
2325  // Check that the call results are passed in the same way.
2326  LLVMContext &C = *DAG.getContext();
2327  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2328  CCAssignFnForReturn(CalleeCC, isVarArg),
2329  CCAssignFnForReturn(CallerCC, isVarArg)))
2330  return false;
2331  // The callee has to preserve all registers the caller needs to preserve.
2332  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2333  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2334  if (CalleeCC != CallerCC) {
2335  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2336  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2337  return false;
2338  }
2339 
2340  // If Caller's vararg or byval argument has been split between registers and
2341  // stack, do not perform tail call, since part of the argument is in caller's
2342  // local frame.
2343  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2344  if (AFI_Caller->getArgRegsSaveSize())
2345  return false;
2346 
2347  // If the callee takes no arguments then go on to check the results of the
2348  // call.
2349  if (!Outs.empty()) {
2350  // Check if stack adjustment is needed. For now, do not do this if any
2351  // argument is passed on the stack.
2353  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2354  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2355  if (CCInfo.getNextStackOffset()) {
2356  // Check if the arguments are already laid out in the right way as
2357  // the caller's fixed stack objects.
2358  MachineFrameInfo &MFI = MF.getFrameInfo();
2359  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2360  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2361  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2362  i != e;
2363  ++i, ++realArgIdx) {
2364  CCValAssign &VA = ArgLocs[i];
2365  EVT RegVT = VA.getLocVT();
2366  SDValue Arg = OutVals[realArgIdx];
2367  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2368  if (VA.getLocInfo() == CCValAssign::Indirect)
2369  return false;
2370  if (VA.needsCustom()) {
2371  // f64 and vector types are split into multiple registers or
2372  // register/stack-slot combinations. The types will not match
2373  // the registers; give up on memory f64 refs until we figure
2374  // out what to do about this.
2375  if (!VA.isRegLoc())
2376  return false;
2377  if (!ArgLocs[++i].isRegLoc())
2378  return false;
2379  if (RegVT == MVT::v2f64) {
2380  if (!ArgLocs[++i].isRegLoc())
2381  return false;
2382  if (!ArgLocs[++i].isRegLoc())
2383  return false;
2384  }
2385  } else if (!VA.isRegLoc()) {
2386  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2387  MFI, MRI, TII))
2388  return false;
2389  }
2390  }
2391  }
2392 
2393  const MachineRegisterInfo &MRI = MF.getRegInfo();
2394  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2395  return false;
2396  }
2397 
2398  return true;
2399 }
2400 
2401 bool
2402 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2403  MachineFunction &MF, bool isVarArg,
2404  const SmallVectorImpl<ISD::OutputArg> &Outs,
2405  LLVMContext &Context) const {
2407  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2408  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2409 }
2410 
2412  const SDLoc &DL, SelectionDAG &DAG) {
2413  const MachineFunction &MF = DAG.getMachineFunction();
2414  const Function *F = MF.getFunction();
2415 
2416  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2417 
2418  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2419  // version of the "preferred return address". These offsets affect the return
2420  // instruction if this is a return from PL1 without hypervisor extensions.
2421  // IRQ/FIQ: +4 "subs pc, lr, #4"
2422  // SWI: 0 "subs pc, lr, #0"
2423  // ABORT: +4 "subs pc, lr, #4"
2424  // UNDEF: +4/+2 "subs pc, lr, #0"
2425  // UNDEF varies depending on where the exception came from ARM or Thumb
2426  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2427 
2428  int64_t LROffset;
2429  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2430  IntKind == "ABORT")
2431  LROffset = 4;
2432  else if (IntKind == "SWI" || IntKind == "UNDEF")
2433  LROffset = 0;
2434  else
2435  report_fatal_error("Unsupported interrupt attribute. If present, value "
2436  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2437 
2438  RetOps.insert(RetOps.begin() + 1,
2439  DAG.getConstant(LROffset, DL, MVT::i32, false));
2440 
2441  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2442 }
2443 
2444 SDValue
2445 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2446  bool isVarArg,
2447  const SmallVectorImpl<ISD::OutputArg> &Outs,
2448  const SmallVectorImpl<SDValue> &OutVals,
2449  const SDLoc &dl, SelectionDAG &DAG) const {
2450  // CCValAssign - represent the assignment of the return value to a location.
2452 
2453  // CCState - Info about the registers and stack slots.
2454  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2455  *DAG.getContext());
2456 
2457  // Analyze outgoing return values.
2458  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2459 
2460  SDValue Flag;
2461  SmallVector<SDValue, 4> RetOps;
2462  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2463  bool isLittleEndian = Subtarget->isLittle();
2464 
2465  MachineFunction &MF = DAG.getMachineFunction();
2467  AFI->setReturnRegsCount(RVLocs.size());
2468 
2469  // Copy the result values into the output registers.
2470  for (unsigned i = 0, realRVLocIdx = 0;
2471  i != RVLocs.size();
2472  ++i, ++realRVLocIdx) {
2473  CCValAssign &VA = RVLocs[i];
2474  assert(VA.isRegLoc() && "Can only return in registers!");
2475 
2476  SDValue Arg = OutVals[realRVLocIdx];
2477 
2478  switch (VA.getLocInfo()) {
2479  default: llvm_unreachable("Unknown loc info!");
2480  case CCValAssign::Full: break;
2481  case CCValAssign::BCvt:
2482  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2483  break;
2484  }
2485 
2486  if (VA.needsCustom()) {
2487  if (VA.getLocVT() == MVT::v2f64) {
2488  // Extract the first half and return it in two registers.
2489  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2490  DAG.getConstant(0, dl, MVT::i32));
2491  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2492  DAG.getVTList(MVT::i32, MVT::i32), Half);
2493 
2494  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2495  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2496  Flag);
2497  Flag = Chain.getValue(1);
2498  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2499  VA = RVLocs[++i]; // skip ahead to next loc
2500  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2501  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2502  Flag);
2503  Flag = Chain.getValue(1);
2504  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2505  VA = RVLocs[++i]; // skip ahead to next loc
2506 
2507  // Extract the 2nd half and fall through to handle it as an f64 value.
2508  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2509  DAG.getConstant(1, dl, MVT::i32));
2510  }
2511  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2512  // available.
2513  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2514  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2515  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2516  fmrrd.getValue(isLittleEndian ? 0 : 1),
2517  Flag);
2518  Flag = Chain.getValue(1);
2519  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2520  VA = RVLocs[++i]; // skip ahead to next loc
2521  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2522  fmrrd.getValue(isLittleEndian ? 1 : 0),
2523  Flag);
2524  } else
2525  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2526 
2527  // Guarantee that all emitted copies are
2528  // stuck together, avoiding something bad.
2529  Flag = Chain.getValue(1);
2530  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2531  }
2532  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2533  const MCPhysReg *I =
2535  if (I) {
2536  for (; *I; ++I) {
2537  if (ARM::GPRRegClass.contains(*I))
2538  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2539  else if (ARM::DPRRegClass.contains(*I))
2540  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2541  else
2542  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2543  }
2544  }
2545 
2546  // Update chain and glue.
2547  RetOps[0] = Chain;
2548  if (Flag.getNode())
2549  RetOps.push_back(Flag);
2550 
2551  // CPUs which aren't M-class use a special sequence to return from
2552  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2553  // though we use "subs pc, lr, #N").
2554  //
2555  // M-class CPUs actually use a normal return sequence with a special
2556  // (hardware-provided) value in LR, so the normal code path works.
2557  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2558  !Subtarget->isMClass()) {
2559  if (Subtarget->isThumb1Only())
2560  report_fatal_error("interrupt attribute is not supported in Thumb1");
2561  return LowerInterruptReturn(RetOps, dl, DAG);
2562  }
2563 
2564  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2565 }
2566 
2567 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2568  if (N->getNumValues() != 1)
2569  return false;
2570  if (!N->hasNUsesOfValue(1, 0))
2571  return false;
2572 
2573  SDValue TCChain = Chain;
2574  SDNode *Copy = *N->use_begin();
2575  if (Copy->getOpcode() == ISD::CopyToReg) {
2576  // If the copy has a glue operand, we conservatively assume it isn't safe to
2577  // perform a tail call.
2578  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2579  return false;
2580  TCChain = Copy->getOperand(0);
2581  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2582  SDNode *VMov = Copy;
2583  // f64 returned in a pair of GPRs.
2584  SmallPtrSet<SDNode*, 2> Copies;
2585  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2586  UI != UE; ++UI) {
2587  if (UI->getOpcode() != ISD::CopyToReg)
2588  return false;
2589  Copies.insert(*UI);
2590  }
2591  if (Copies.size() > 2)
2592  return false;
2593 
2594  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2595  UI != UE; ++UI) {
2596  SDValue UseChain = UI->getOperand(0);
2597  if (Copies.count(UseChain.getNode()))
2598  // Second CopyToReg
2599  Copy = *UI;
2600  else {
2601  // We are at the top of this chain.
2602  // If the copy has a glue operand, we conservatively assume it
2603  // isn't safe to perform a tail call.
2604  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2605  return false;
2606  // First CopyToReg
2607  TCChain = UseChain;
2608  }
2609  }
2610  } else if (Copy->getOpcode() == ISD::BITCAST) {
2611  // f32 returned in a single GPR.
2612  if (!Copy->hasOneUse())
2613  return false;
2614  Copy = *Copy->use_begin();
2615  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2616  return false;
2617  // If the copy has a glue operand, we conservatively assume it isn't safe to
2618  // perform a tail call.
2619  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2620  return false;
2621  TCChain = Copy->getOperand(0);
2622  } else {
2623  return false;
2624  }
2625 
2626  bool HasRet = false;
2627  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2628  UI != UE; ++UI) {
2629  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2630  UI->getOpcode() != ARMISD::INTRET_FLAG)
2631  return false;
2632  HasRet = true;
2633  }
2634 
2635  if (!HasRet)
2636  return false;
2637 
2638  Chain = TCChain;
2639  return true;
2640 }
2641 
2642 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2643  if (!Subtarget->supportsTailCall())
2644  return false;
2645 
2646  auto Attr =
2647  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2648  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2649  return false;
2650 
2651  return true;
2652 }
2653 
2654 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2655 // and pass the lower and high parts through.
2657  SDLoc DL(Op);
2658  SDValue WriteValue = Op->getOperand(2);
2659 
2660  // This function is only supposed to be called for i64 type argument.
2661  assert(WriteValue.getValueType() == MVT::i64
2662  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2663 
2664  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2665  DAG.getConstant(0, DL, MVT::i32));
2666  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2667  DAG.getConstant(1, DL, MVT::i32));
2668  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2669  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2670 }
2671 
2672 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2673 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2674 // one of the above mentioned nodes. It has to be wrapped because otherwise
2675 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2676 // be used to form addressing mode. These wrapped nodes will be selected
2677 // into MOVi.
2678 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2679  SelectionDAG &DAG) const {
2680  EVT PtrVT = Op.getValueType();
2681  // FIXME there is no actual debug info here
2682  SDLoc dl(Op);
2683  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2684  SDValue Res;
2685 
2686  // When generating execute-only code Constant Pools must be promoted to the
2687  // global data section. It's a bit ugly that we can't share them across basic
2688  // blocks, but this way we guarantee that execute-only behaves correct with
2689  // position-independent addressing modes.
2690  if (Subtarget->genExecuteOnly()) {
2691  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2692  auto T = const_cast<Type*>(CP->getType());
2693  auto C = const_cast<Constant*>(CP->getConstVal());
2694  auto M = const_cast<Module*>(DAG.getMachineFunction().
2695  getFunction()->getParent());
2696  auto GV = new GlobalVariable(
2697  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2698  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2699  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2700  Twine(AFI->createPICLabelUId())
2701  );
2702  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2703  dl, PtrVT);
2704  return LowerGlobalAddress(GA, DAG);
2705  }
2706 
2707  if (CP->isMachineConstantPoolEntry())
2708  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2709  CP->getAlignment());
2710  else
2711  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2712  CP->getAlignment());
2713  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2714 }
2715 
2718 }
2719 
2720 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2721  SelectionDAG &DAG) const {
2722  MachineFunction &MF = DAG.getMachineFunction();
2724  unsigned ARMPCLabelIndex = 0;
2725  SDLoc DL(Op);
2726  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2727  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2728  SDValue CPAddr;
2729  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2730  if (!IsPositionIndependent) {
2731  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2732  } else {
2733  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2734  ARMPCLabelIndex = AFI->createPICLabelUId();
2735  ARMConstantPoolValue *CPV =
2736  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2737  ARMCP::CPBlockAddress, PCAdj);
2738  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2739  }
2740  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2741  SDValue Result = DAG.getLoad(
2742  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2744  if (!IsPositionIndependent)
2745  return Result;
2746  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2747  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2748 }
2749 
2750 /// \brief Convert a TLS address reference into the correct sequence of loads
2751 /// and calls to compute the variable's address for Darwin, and return an
2752 /// SDValue containing the final node.
2753 
2754 /// Darwin only has one TLS scheme which must be capable of dealing with the
2755 /// fully general situation, in the worst case. This means:
2756 /// + "extern __thread" declaration.
2757 /// + Defined in a possibly unknown dynamic library.
2758 ///
2759 /// The general system is that each __thread variable has a [3 x i32] descriptor
2760 /// which contains information used by the runtime to calculate the address. The
2761 /// only part of this the compiler needs to know about is the first word, which
2762 /// contains a function pointer that must be called with the address of the
2763 /// entire descriptor in "r0".
2764 ///
2765 /// Since this descriptor may be in a different unit, in general access must
2766 /// proceed along the usual ARM rules. A common sequence to produce is:
2767 ///
2768 /// movw rT1, :lower16:_var$non_lazy_ptr
2769 /// movt rT1, :upper16:_var$non_lazy_ptr
2770 /// ldr r0, [rT1]
2771 /// ldr rT2, [r0]
2772 /// blx rT2
2773 /// [...address now in r0...]
2774 SDValue
2775 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2776  SelectionDAG &DAG) const {
2777  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
2778  SDLoc DL(Op);
2779 
2780  // First step is to get the address of the actua global symbol. This is where
2781  // the TLS descriptor lives.
2782  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2783 
2784  // The first entry in the descriptor is a function pointer that we must call
2785  // to obtain the address of the variable.
2786  SDValue Chain = DAG.getEntryNode();
2787  SDValue FuncTLVGet = DAG.getLoad(
2788  MVT::i32, DL, Chain, DescAddr,
2790  /* Alignment = */ 4,
2793  Chain = FuncTLVGet.getValue(1);
2794 
2796  MachineFrameInfo &MFI = F.getFrameInfo();
2797  MFI.setAdjustsStack(true);
2798 
2799  // TLS calls preserve all registers except those that absolutely must be
2800  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2801  // silly).
2802  auto TRI =
2803  getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2804  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2805  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2806 
2807  // Finally, we can make the call. This is just a degenerate version of a
2808  // normal AArch64 call node: r0 takes the address of the descriptor, and
2809  // returns the address of the variable in this thread.
2810  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2811  Chain =
2813  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2814  DAG.getRegisterMask(Mask), Chain.getValue(1));
2815  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2816 }
2817 
2818 SDValue
2819 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2820  SelectionDAG &DAG) const {
2821  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2822 
2823  SDValue Chain = DAG.getEntryNode();
2824  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2825  SDLoc DL(Op);
2826 
2827  // Load the current TEB (thread environment block)
2828  SDValue Ops[] = {Chain,
2829  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2830  DAG.getConstant(15, DL, MVT::i32),
2831  DAG.getConstant(0, DL, MVT::i32),
2832  DAG.getConstant(13, DL, MVT::i32),
2833  DAG.getConstant(0, DL, MVT::i32),
2834  DAG.getConstant(2, DL, MVT::i32)};
2835  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2836  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2837 
2838  SDValue TEB = CurrentTEB.getValue(0);
2839  Chain = CurrentTEB.getValue(1);
2840 
2841  // Load the ThreadLocalStoragePointer from the TEB
2842  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2843  SDValue TLSArray =
2844  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2845  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2846 
2847  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2848  // offset into the TLSArray.
2849 
2850  // Load the TLS index from the C runtime
2851  SDValue TLSIndex =
2852  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2853  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2854  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2855 
2856  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2857  DAG.getConstant(2, DL, MVT::i32));
2858  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2859  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2860  MachinePointerInfo());
2861 
2862  // Get the offset of the start of the .tls section (section base)
2863  const auto *GA = cast<GlobalAddressSDNode>(Op);
2864  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2865  SDValue Offset = DAG.getLoad(
2866  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2867  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2869 
2870  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2871 }
2872 
2873 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2874 SDValue
2875 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2876  SelectionDAG &DAG) const {
2877  SDLoc dl(GA);
2878  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2879  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2880  MachineFunction &MF = DAG.getMachineFunction();
2882  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2883  ARMConstantPoolValue *CPV =
2884  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2885  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2886  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2887  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2888  Argument = DAG.getLoad(
2889  PtrVT, dl, DAG.getEntryNode(), Argument,
2891  SDValue Chain = Argument.getValue(1);
2892 
2893  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2894  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2895 
2896  // call __tls_get_addr.
2897  ArgListTy Args;
2898  ArgListEntry Entry;
2899  Entry.Node = Argument;
2900  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2901  Args.push_back(Entry);
2902 
2903  // FIXME: is there useful debug info available here?
2905  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2907  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2908 
2909  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2910  return CallResult.first;
2911 }
2912 
2913 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2914 // "local exec" model.
2915 SDValue
2916 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2917  SelectionDAG &DAG,
2918  TLSModel::Model model) const {
2919  const GlobalValue *GV = GA->getGlobal();
2920  SDLoc dl(GA);
2921  SDValue Offset;
2922  SDValue Chain = DAG.getEntryNode();
2923  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2924  // Get the Thread Pointer
2926 
2927  if (model == TLSModel::InitialExec) {
2928  MachineFunction &MF = DAG.getMachineFunction();
2930  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2931  // Initial exec model.
2932  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2933  ARMConstantPoolValue *CPV =
2934  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2936  true);
2937  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2938  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2939  Offset = DAG.getLoad(
2940  PtrVT, dl, Chain, Offset,
2942  Chain = Offset.getValue(1);
2943 
2944  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2945  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2946 
2947  Offset = DAG.getLoad(
2948  PtrVT, dl, Chain, Offset,
2950  } else {
2951  // local exec model
2952  assert(model == TLSModel::LocalExec);
2953  ARMConstantPoolValue *CPV =
2955  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2956  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2957  Offset = DAG.getLoad(
2958  PtrVT, dl, Chain, Offset,
2960  }
2961 
2962  // The address of the thread local variable is the add of the thread
2963  // pointer with the offset of the variable.
2964  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2965 }
2966 
2967 SDValue
2968 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2969  if (Subtarget->isTargetDarwin())
2970  return LowerGlobalTLSAddressDarwin(Op, DAG);
2971 
2972  if (Subtarget->isTargetWindows())
2973  return LowerGlobalTLSAddressWindows(Op, DAG);
2974 
2975  // TODO: implement the "local dynamic" model
2976  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
2977  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2978  if (DAG.getTarget().Options.EmulatedTLS)
2979  return LowerToTLSEmulatedModel(GA, DAG);
2980 
2981  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2982 
2983  switch (model) {
2986  return LowerToTLSGeneralDynamicModel(GA, DAG);
2987  case TLSModel::InitialExec:
2988  case TLSModel::LocalExec:
2989  return LowerToTLSExecModels(GA, DAG, model);
2990  }
2991  llvm_unreachable("bogus TLS model");
2992 }
2993 
2994 /// Return true if all users of V are within function F, looking through
2995 /// ConstantExprs.
2996 static bool allUsersAreInFunction(const Value *V, const Function *F) {
2997  SmallVector<const User*,4> Worklist;
2998  for (auto *U : V->users())
2999  Worklist.push_back(U);
3000  while (!Worklist.empty()) {
3001  auto *U = Worklist.pop_back_val();
3002  if (isa<ConstantExpr>(U)) {
3003  for (auto *UU : U->users())
3004  Worklist.push_back(UU);
3005  continue;
3006  }
3007 
3008  auto *I = dyn_cast<Instruction>(U);
3009  if (!I || I->getParent()->getParent() != F)
3010  return false;
3011  }
3012  return true;
3013 }
3014 
3015 /// Return true if all users of V are within some (any) function, looking through
3016 /// ConstantExprs. In other words, are there any global constant users?
3017 static bool allUsersAreInFunctions(const Value *V) {
3018  SmallVector<const User*,4> Worklist;
3019  for (auto *U : V->users())
3020  Worklist.push_back(U);
3021  while (!Worklist.empty()) {
3022  auto *U = Worklist.pop_back_val();
3023  if (isa<ConstantExpr>(U)) {
3024  for (auto *UU : U->users())
3025  Worklist.push_back(UU);
3026  continue;
3027  }
3028 
3029  if (!isa<Instruction>(U))
3030  return false;
3031  }
3032  return true;
3033 }
3034 
3035 // Return true if T is an integer, float or an array/vector of either.
3036 static bool isSimpleType(Type *T) {
3037  if (T->isIntegerTy() || T->isFloatingPointTy())
3038  return true;
3039  Type *SubT = nullptr;
3040  if (T->isArrayTy())
3041  SubT = T->getArrayElementType();
3042  else if (T->isVectorTy())
3043  SubT = T->getVectorElementType();
3044  else
3045  return false;
3046  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3047 }
3048 
3050  EVT PtrVT, const SDLoc &dl) {
3051  // If we're creating a pool entry for a constant global with unnamed address,
3052  // and the global is small enough, we can emit it inline into the constant pool
3053  // to save ourselves an indirection.
3054  //
3055  // This is a win if the constant is only used in one function (so it doesn't
3056  // need to be duplicated) or duplicating the constant wouldn't increase code
3057  // size (implying the constant is no larger than 4 bytes).
3058  const Function *F = DAG.getMachineFunction().getFunction();
3059 
3060  // We rely on this decision to inline being idemopotent and unrelated to the
3061  // use-site. We know that if we inline a variable at one use site, we'll
3062  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3063  // doesn't know about this optimization, so bail out if it's enabled else
3064  // we could decide to inline here (and thus never emit the GV) but require
3065  // the GV from fast-isel generated code.
3066  if (!EnableConstpoolPromotion ||
3068  return SDValue();
3069 
3070  auto *GVar = dyn_cast<GlobalVariable>(GV);
3071  if (!GVar || !GVar->hasInitializer() ||
3072  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3073  !GVar->hasLocalLinkage())
3074  return SDValue();
3075 
3076  // Ensure that we don't try and inline any type that contains pointers. If
3077  // we inline a value that contains relocations, we move the relocations from
3078  // .data to .text which is not ideal.
3079  auto *Init = GVar->getInitializer();
3080  if (!isSimpleType(Init->getType()))
3081  return SDValue();
3082 
3083  // The constant islands pass can only really deal with alignment requests
3084  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3085  // any type wanting greater alignment requirements than 4 bytes. We also
3086  // can only promote constants that are multiples of 4 bytes in size or
3087  // are paddable to a multiple of 4. Currently we only try and pad constants
3088  // that are strings for simplicity.
3089  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3090  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3091  unsigned Align = GVar->getAlignment();
3092  unsigned RequiredPadding = 4 - (Size % 4);
3093  bool PaddingPossible =
3094  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3095  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3096  Size == 0)
3097  return SDValue();
3098 
3099  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3100  MachineFunction &MF = DAG.getMachineFunction();
3102 
3103  // We can't bloat the constant pool too much, else the ConstantIslands pass
3104  // may fail to converge. If we haven't promoted this global yet (it may have
3105  // multiple uses), and promoting it would increase the constant pool size (Sz
3106  // > 4), ensure we have space to do so up to MaxTotal.
3107  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3108  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3110  return SDValue();
3111 
3112  // This is only valid if all users are in a single function OR it has users
3113  // in multiple functions but it no larger than a pointer. We also check if
3114  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3115  // address taken.
3116  if (!allUsersAreInFunction(GVar, F) &&
3117  !(Size <= 4 && allUsersAreInFunctions(GVar)))
3118  return SDValue();
3119 
3120  // We're going to inline this global. Pad it out if needed.
3121  if (RequiredPadding != 4) {
3122  StringRef S = CDAInit->getAsString();
3123 
3125  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3126  while (RequiredPadding--)
3127  V.push_back(0);
3128  Init = ConstantDataArray::get(*DAG.getContext(), V);
3129  }
3130 
3131  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3132  SDValue CPAddr =
3133  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3134  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3137  PaddedSize - 4);
3138  }
3139  ++NumConstpoolPromoted;
3140  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3141 }
3142 
3144  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3145  GV = GA->getBaseObject();
3146  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3147  isa<Function>(GV);
3148 }
3149 
3150 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3151  SelectionDAG &DAG) const {
3152  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3153  default: llvm_unreachable("unknown object format");
3154  case Triple::COFF:
3155  return LowerGlobalAddressWindows(Op, DAG);
3156  case Triple::ELF:
3157  return LowerGlobalAddressELF(Op, DAG);
3158  case Triple::MachO:
3159  return LowerGlobalAddressDarwin(Op, DAG);
3160  }
3161 }
3162 
3163 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3164  SelectionDAG &DAG) const {
3165  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3166  SDLoc dl(Op);
3167  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3168  const TargetMachine &TM = getTargetMachine();
3169  bool IsRO = isReadOnly(GV);
3170 
3171  // promoteToConstantPool only if not generating XO text section
3172  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3173  if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3174  return V;
3175 
3176  if (isPositionIndependent()) {
3177  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3178 
3179  MachineFunction &MF = DAG.getMachineFunction();
3181  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3182  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3183  SDLoc dl(Op);
3184  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3186  GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3187  UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3188  /*AddCurrentAddress=*/UseGOT_PREL);
3189  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3190  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3191  SDValue Result = DAG.getLoad(
3192  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3194  SDValue Chain = Result.getValue(1);
3195  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3196  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3197  if (UseGOT_PREL)
3198  Result =
3199  DAG.getLoad(PtrVT, dl, Chain, Result,
3201  return Result;
3202  } else if (Subtarget->isROPI() && IsRO) {
3203  // PC-relative.
3204  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3205  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3206  return Result;
3207  } else if (Subtarget->isRWPI() && !IsRO) {
3208  // SB-relative.
3209  SDValue RelAddr;
3210  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3211  ++NumMovwMovt;
3212  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3213  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3214  } else { // use literal pool for address constant
3215  ARMConstantPoolValue *CPV =
3217  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3218  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3219  RelAddr = DAG.getLoad(
3220  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3222  }
3223  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3224  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3225  return Result;
3226  }
3227 
3228  // If we have T2 ops, we can materialize the address directly via movt/movw
3229  // pair. This is always cheaper.
3230  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3231  ++NumMovwMovt;
3232  // FIXME: Once remat is capable of dealing with instructions with register
3233  // operands, expand this into two nodes.
3234  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3235  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3236  } else {
3237  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3238  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3239  return DAG.getLoad(
3240  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3242  }
3243 }
3244 
3245 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3246  SelectionDAG &DAG) const {
3247  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3248  "ROPI/RWPI not currently supported for Darwin");
3249  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3250  SDLoc dl(Op);
3251  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3252 
3253  if (Subtarget->useMovt(DAG.getMachineFunction()))
3254  ++NumMovwMovt;
3255 
3256  // FIXME: Once remat is capable of dealing with instructions with register
3257  // operands, expand this into multiple nodes
3258  unsigned Wrapper =
3260 
3261  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3262  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3263 
3264  if (Subtarget->isGVIndirectSymbol(GV))
3265  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3267  return Result;
3268 }
3269 
3270 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3271  SelectionDAG &DAG) const {
3272  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3273  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3274  "Windows on ARM expects to use movw/movt");
3275  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3276  "ROPI/RWPI not currently supported for Windows");
3277 
3278  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3279  const ARMII::TOF TargetFlags =
3280  (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3281  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3282  SDValue Result;
3283  SDLoc DL(Op);
3284 
3285  ++NumMovwMovt;
3286 
3287  // FIXME: Once remat is capable of dealing with instructions with register
3288  // operands, expand this into two nodes.
3289  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3290  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3291  TargetFlags));
3292  if (GV->hasDLLImportStorageClass())
3293  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3295  return Result;
3296 }
3297 
3298 SDValue
3299 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3300  SDLoc dl(Op);
3301  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3302  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3303  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3304  Op.getOperand(1), Val);
3305 }
3306 
3307 SDValue
3308 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3309  SDLoc dl(Op);
3310  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3311  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3312 }
3313 
3314 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3315  SelectionDAG &DAG) const {
3316  SDLoc dl(Op);
3318  Op.getOperand(0));
3319 }
3320 
3321 SDValue
3322 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3323  const ARMSubtarget *Subtarget) const {
3324  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3325  SDLoc dl(Op);
3326  switch (IntNo) {
3327  default: return SDValue(); // Don't custom lower most intrinsics.
3328  case Intrinsic::thread_pointer: {
3329  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3330  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3331  }
3332  case Intrinsic::eh_sjlj_lsda: {
3333  MachineFunction &MF = DAG.getMachineFunction();
3335  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3336  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3337  SDValue CPAddr;
3338  bool IsPositionIndependent = isPositionIndependent();
3339  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3340  ARMConstantPoolValue *CPV =
3341  ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3342  ARMCP::CPLSDA, PCAdj);
3343  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3344  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3345  SDValue Result = DAG.getLoad(
3346  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3348 
3349  if (IsPositionIndependent) {
3350  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3351  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3352  }
3353  return Result;
3354  }
3355  case Intrinsic::arm_neon_vabs:
3356  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3357  Op.getOperand(1));
3358  case Intrinsic::arm_neon_vmulls:
3359  case Intrinsic::arm_neon_vmullu: {
3360  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3362  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3363  Op.getOperand(1), Op.getOperand(2));
3364  }
3365  case Intrinsic::arm_neon_vminnm:
3366  case Intrinsic::arm_neon_vmaxnm: {
3367  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3369  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3370  Op.getOperand(1), Op.getOperand(2));
3371  }
3372  case Intrinsic::arm_neon_vminu:
3373  case Intrinsic::arm_neon_vmaxu: {
3374  if (Op.getValueType().isFloatingPoint())
3375  return SDValue();
3376  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3377  ? ISD::UMIN : ISD::UMAX;
3378  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3379  Op.getOperand(1), Op.getOperand(2));
3380  }
3381  case Intrinsic::arm_neon_vmins:
3382  case Intrinsic::arm_neon_vmaxs: {
3383  // v{min,max}s is overloaded between signed integers and floats.
3384  if (!Op.getValueType().isFloatingPoint()) {
3385  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3386  ? ISD::SMIN : ISD::SMAX;
3387  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3388  Op.getOperand(1), Op.getOperand(2));
3389  }
3390  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3392  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3393  Op.getOperand(1), Op.getOperand(2));
3394  }
3395  case Intrinsic::arm_neon_vtbl1:
3396  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3397  Op.getOperand(1), Op.getOperand(2));
3398  case Intrinsic::arm_neon_vtbl2:
3399  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3400  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3401  }
3402 }
3403 
3405  const ARMSubtarget *Subtarget) {
3406  SDLoc dl(Op);
3407  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3408  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3409  if (SSID == SyncScope::SingleThread)
3410  return Op;
3411 
3412  if (!Subtarget->hasDataBarrier()) {
3413  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3414  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3415  // here.
3416  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3417  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3418  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3419  DAG.getConstant(0, dl, MVT::i32));
3420  }
3421 
3422  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3423  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3424  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3425  if (Subtarget->isMClass()) {
3426  // Only a full system barrier exists in the M-class architectures.
3427  Domain = ARM_MB::SY;
3428  } else if (Subtarget->preferISHSTBarriers() &&
3429  Ord == AtomicOrdering::Release) {
3430  // Swift happens to implement ISHST barriers in a way that's compatible with
3431  // Release semantics but weaker than ISH so we'd be fools not to use
3432  // it. Beware: other processors probably don't!
3433  Domain = ARM_MB::ISHST;
3434  }
3435 
3436  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3437  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3438  DAG.getConstant(Domain, dl, MVT::i32));
3439 }
3440 
3442  const ARMSubtarget *Subtarget) {
3443  // ARM pre v5TE and Thumb1 does not have preload instructions.
3444  if (!(Subtarget->isThumb2() ||
3445  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3446  // Just preserve the chain.
3447  return Op.getOperand(0);
3448 
3449  SDLoc dl(Op);
3450  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3451  if (!isRead &&
3452  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3453  // ARMv7 with MP extension has PLDW.
3454  return Op.getOperand(0);
3455 
3456  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3457  if (Subtarget->isThumb()) {
3458  // Invert the bits.
3459  isRead = ~isRead & 1;
3460  isData = ~isData & 1;
3461  }
3462 
3463  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3464  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3465  DAG.getConstant(isData, dl, MVT::i32));
3466 }
3467 
3469  MachineFunction &MF = DAG.getMachineFunction();
3470  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3471 
3472  // vastart just stores the address of the VarArgsFrameIndex slot into the
3473  // memory location argument.
3474  SDLoc dl(Op);
3475  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3476  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3477  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3478  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3479  MachinePointerInfo(SV));
3480 }
3481 
3482 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3483  CCValAssign &NextVA,
3484  SDValue &Root,
3485  SelectionDAG &DAG,
3486  const SDLoc &dl) const {
3487  MachineFunction &MF = DAG.getMachineFunction();
3489 
3490  const TargetRegisterClass *RC;
3491  if (AFI->isThumb1OnlyFunction())
3492  RC = &ARM::tGPRRegClass;
3493  else
3494  RC = &ARM::GPRRegClass;
3495 
3496  // Transform the arguments stored in physical registers into virtual ones.
3497  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3498  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3499 
3500  SDValue ArgValue2;
3501  if (NextVA.isMemLoc()) {
3502  MachineFrameInfo &MFI = MF.getFrameInfo();
3503  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3504 
3505  // Create load node to retrieve arguments from the stack.
3506  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3507  ArgValue2 = DAG.getLoad(
3508  MVT::i32, dl, Root, FIN,
3510  } else {
3511  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3512  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3513  }
3514  if (!Subtarget->isLittle())
3515  std::swap (ArgValue, ArgValue2);
3516  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3517 }
3518 
3519 // The remaining GPRs hold either the beginning of variable-argument
3520 // data, or the beginning of an aggregate passed by value (usually
3521 // byval). Either way, we allocate stack slots adjacent to the data
3522 // provided by our caller, and store the unallocated registers there.
3523 // If this is a variadic function, the va_list pointer will begin with
3524 // these values; otherwise, this reassembles a (byval) structure that
3525 // was split between registers and memory.
3526 // Return: The frame index registers were stored into.
3527 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3528  const SDLoc &dl, SDValue &Chain,
3529  const Value *OrigArg,
3530  unsigned InRegsParamRecordIdx,
3531  int ArgOffset, unsigned ArgSize) const {
3532  // Currently, two use-cases possible:
3533  // Case #1. Non-var-args function, and we meet first byval parameter.
3534  // Setup first unallocated register as first byval register;
3535  // eat all remained registers
3536  // (these two actions are performed by HandleByVal method).
3537  // Then, here, we initialize stack frame with
3538  // "store-reg" instructions.
3539  // Case #2. Var-args function, that doesn't contain byval parameters.
3540  // The same: eat all remained unallocated registers,
3541  // initialize stack frame.
3542 
3543  MachineFunction &MF = DAG.getMachineFunction();
3544  MachineFrameInfo &MFI = MF.getFrameInfo();
3546  unsigned RBegin, REnd;
3547  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3548  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3549  } else {
3550  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3551  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3552  REnd = ARM::R4;
3553  }
3554 
3555  if (REnd != RBegin)
3556  ArgOffset = -4 * (ARM::R4 - RBegin);
3557 
3558  auto PtrVT = getPointerTy(DAG.getDataLayout());
3559  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3560  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3561 
3562  SmallVector<SDValue, 4> MemOps;
3563  const TargetRegisterClass *RC =
3564  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3565 
3566  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3567  unsigned VReg = MF.addLiveIn(Reg, RC);
3568  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3569  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3570  MachinePointerInfo(OrigArg, 4 * i));
3571  MemOps.push_back(Store);
3572  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3573  }
3574 
3575  if (!MemOps.empty())
3576  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3577  return FrameIndex;
3578 }
3579 
3580 // Setup stack frame, the va_list pointer will start from.
3581 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3582  const SDLoc &dl, SDValue &Chain,
3583  unsigned ArgOffset,
3584  unsigned TotalArgRegsSaveSize,
3585  bool ForceMutable) const {
3586  MachineFunction &MF = DAG.getMachineFunction();
3588 
3589  // Try to store any remaining integer argument regs
3590  // to their spots on the stack so that they may be loaded by dereferencing
3591  // the result of va_next.
3592  // If there is no regs to be stored, just point address after last
3593  // argument passed via stack.
3594  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3595  CCInfo.getInRegsParamsCount(),
3596  CCInfo.getNextStackOffset(), 4);
3597  AFI->setVarArgsFrameIndex(FrameIndex);
3598 }
3599 
3600 SDValue ARMTargetLowering::LowerFormalArguments(
3601  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3602  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3603  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3604  MachineFunction &MF = DAG.getMachineFunction();
3605  MachineFrameInfo &MFI = MF.getFrameInfo();
3606 
3608 
3609  // Assign locations to all of the incoming arguments.
3611  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3612  *DAG.getContext());
3613  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3614 
3615  SmallVector<SDValue, 16> ArgValues;
3616  SDValue ArgValue;
3617  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3618  unsigned CurArgIdx = 0;
3619 
3620  // Initially ArgRegsSaveSize is zero.
3621  // Then we increase this value each time we meet byval parameter.
3622  // We also increase this value in case of varargs function.
3623  AFI->setArgRegsSaveSize(0);
3624 
3625  // Calculate the amount of stack space that we need to allocate to store
3626  // byval and variadic arguments that are passed in registers.
3627  // We need to know this before we allocate the first byval or variadic
3628  // argument, as they will be allocated a stack slot below the CFA (Canonical
3629  // Frame Address, the stack pointer at entry to the function).
3630  unsigned ArgRegBegin = ARM::R4;
3631  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3632  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3633  break;
3634 
3635  CCValAssign &VA = ArgLocs[i];
3636  unsigned Index = VA.getValNo();
3637  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3638  if (!Flags.isByVal())
3639  continue;
3640 
3641  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3642  unsigned RBegin, REnd;
3643  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3644  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3645 
3646  CCInfo.nextInRegsParam();
3647  }
3648  CCInfo.rewindByValRegsInfo();
3649 
3650  int lastInsIndex = -1;
3651  if (isVarArg && MFI.hasVAStart()) {
3652  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3653  if (RegIdx != array_lengthof(GPRArgRegs))
3654  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3655  }
3656 
3657  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3658  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3659  auto PtrVT = getPointerTy(DAG.getDataLayout());
3660 
3661  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3662  CCValAssign &VA = ArgLocs[i];
3663  if (Ins[VA.getValNo()].isOrigArg()) {
3664  std::advance(CurOrigArg,
3665  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3666  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3667  }
3668  // Arguments stored in registers.
3669  if (VA.isRegLoc()) {
3670  EVT RegVT = VA.getLocVT();
3671 
3672  if (VA.needsCustom()) {
3673  // f64 and vector types are split up into multiple registers or
3674  // combinations of registers and stack slots.
3675  if (VA.getLocVT() == MVT::v2f64) {
3676  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3677  Chain, DAG, dl);
3678  VA = ArgLocs[++i]; // skip ahead to next loc
3679  SDValue ArgValue2;
3680  if (VA.isMemLoc()) {
3681  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3682  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3683  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3685  DAG.getMachineFunction(), FI));
3686  } else {
3687  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3688  Chain, DAG, dl);
3689  }
3690  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3691  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3692  ArgValue, ArgValue1,
3693  DAG.getIntPtrConstant(0, dl));
3694  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3695  ArgValue, ArgValue2,
3696  DAG.getIntPtrConstant(1, dl));
3697  } else
3698  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3699  } else {
3700  const TargetRegisterClass *RC;
3701 
3702  if (RegVT == MVT::f32)
3703  RC = &ARM::SPRRegClass;
3704  else if (RegVT == MVT::f64)
3705  RC = &ARM::DPRRegClass;
3706  else if (RegVT == MVT::v2f64)
3707  RC = &ARM::QPRRegClass;
3708  else if (RegVT == MVT::i32)
3709  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3710  : &ARM::GPRRegClass;
3711  else
3712  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3713 
3714  // Transform the arguments in physical registers into virtual ones.
3715  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3716  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3717  }
3718 
3719  // If this is an 8 or 16-bit value, it is really passed promoted
3720  // to 32 bits. Insert an assert[sz]ext to capture this, then
3721  // truncate to the right size.
3722  switch (VA.getLocInfo()) {
3723  default: llvm_unreachable("Unknown loc info!");
3724  case CCValAssign::Full: break;
3725  case CCValAssign::BCvt:
3726  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3727  break;
3728  case CCValAssign::SExt:
3729  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3730  DAG.getValueType(VA.getValVT()));
3731  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3732  break;
3733  case CCValAssign::ZExt:
3734  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3735  DAG.getValueType(VA.getValVT()));
3736  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3737  break;
3738  }
3739 
3740  InVals.push_back(ArgValue);
3741  } else { // VA.isRegLoc()
3742  // sanity check
3743  assert(VA.isMemLoc());
3744  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3745 
3746  int index = VA.getValNo();
3747 
3748  // Some Ins[] entries become multiple ArgLoc[] entries.
3749  // Process them only once.
3750  if (index != lastInsIndex)
3751  {
3752  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3753  // FIXME: For now, all byval parameter objects are marked mutable.
3754  // This can be changed with more analysis.
3755  // In case of tail call optimization mark all arguments mutable.
3756  // Since they could be overwritten by lowering of arguments in case of
3757  // a tail call.
3758  if (Flags.isByVal()) {
3759  assert(Ins[index].isOrigArg() &&
3760  "Byval arguments cannot be implicit");
3761  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3762 
3763  int FrameIndex = StoreByValRegs(
3764  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3765  VA.getLocMemOffset(), Flags.getByValSize());
3766  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3767  CCInfo.nextInRegsParam();
3768  } else {
3769  unsigned FIOffset = VA.getLocMemOffset();
3770  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3771  FIOffset, true);
3772 
3773  // Create load nodes to retrieve arguments from the stack.
3774  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3775  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3777  DAG.getMachineFunction(), FI)));
3778  }
3779  lastInsIndex = index;
3780  }
3781  }
3782  }
3783 
3784  // varargs
3785  if (isVarArg && MFI.hasVAStart())
3786  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3787  CCInfo.getNextStackOffset(),
3788  TotalArgRegsSaveSize);
3789 
3791 
3792  return Chain;
3793 }
3794 
3795 /// isFloatingPointZero - Return true if this is +0.0.
3796 static bool isFloatingPointZero(SDValue Op) {
3797  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3798  return CFP->getValueAPF().isPosZero();
3799  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3800  // Maybe this has already been legalized into the constant pool?
3801  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3802  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3803  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3804  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3805  return CFP->getValueAPF().isPosZero();
3806  }
3807  } else if (Op->getOpcode() == ISD::BITCAST &&
3808  Op->getValueType(0) == MVT::f64) {
3809  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3810  // created by LowerConstantFP().
3811  SDValue BitcastOp = Op->getOperand(0);
3812  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3813  isNullConstant(BitcastOp->getOperand(0)))
3814  return true;
3815  }
3816  return false;
3817 }
3818 
3819 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3820 /// the given operands.
3821 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3822  SDValue &ARMcc, SelectionDAG &DAG,
3823  const SDLoc &dl) const {
3824  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3825  unsigned C = RHSC->getZExtValue();
3826  if (!isLegalICmpImmediate(C)) {
3827  // Constant does not fit, try adjusting it by one?
3828  switch (CC) {
3829  default: break;
3830  case ISD::SETLT:
3831  case ISD::SETGE:
3832  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3833  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3834  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3835  }
3836  break;
3837  case ISD::SETULT:
3838  case ISD::SETUGE:
3839  if (C != 0 && isLegalICmpImmediate(C-1)) {
3840  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3841  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3842  }
3843  break;
3844  case ISD::SETLE:
3845  case ISD::SETGT:
3846  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3847  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3848  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3849  }
3850  break;
3851  case ISD::SETULE:
3852  case ISD::SETUGT:
3853  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3854  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3855  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3856  }
3857  break;
3858  }
3859  }
3860  }
3861 
3863  ARMISD::NodeType CompareType;
3864  switch (CondCode) {
3865  default:
3866  CompareType = ARMISD::CMP;
3867  break;
3868  case ARMCC::EQ:
3869  case ARMCC::NE:
3870  // Uses only Z Flag
3871  CompareType = ARMISD::CMPZ;
3872  break;
3873  }
3874  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3875  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3876 }
3877 
3878 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3879 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3880  SelectionDAG &DAG, const SDLoc &dl,
3881  bool InvalidOnQNaN) const {
3882  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3883  SDValue Cmp;
3884  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3885  if (!isFloatingPointZero(RHS))
3886  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3887  else
3888  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3889  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3890 }
3891 
3892 /// duplicateCmp - Glue values can have only one use, so this function
3893 /// duplicates a comparison node.
3894 SDValue
3895 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3896  unsigned Opc = Cmp.getOpcode();
3897  SDLoc DL(Cmp);
3898  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3899  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3900 
3901  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3902  Cmp = Cmp.getOperand(0);
3903  Opc = Cmp.getOpcode();
3904  if (Opc == ARMISD::CMPFP)
3905  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3906  Cmp.getOperand(1), Cmp.getOperand(2));
3907  else {
3908  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3909  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3910  Cmp.getOperand(1));
3911  }
3912  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3913 }
3914 
3915 std::pair<SDValue, SDValue>
3916 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3917  SDValue &ARMcc) const {
3918  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3919 
3920  SDValue Value, OverflowCmp;
3921  SDValue LHS = Op.getOperand(0);
3922  SDValue RHS = Op.getOperand(1);
3923  SDLoc dl(Op);
3924 
3925  // FIXME: We are currently always generating CMPs because we don't support
3926  // generating CMN through the backend. This is not as good as the natural
3927  // CMP case because it causes a register dependency and cannot be folded
3928  // later.
3929 
3930  switch (Op.getOpcode()) {
3931  default:
3932  llvm_unreachable("Unknown overflow instruction!");
3933  case ISD::SADDO:
3934  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3935  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3936  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3937  break;
3938  case ISD::UADDO:
3939  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3940  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3941  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3942  break;
3943  case ISD::SSUBO:
3944  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3945  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3946  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3947  break;
3948  case ISD::USUBO:
3949  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3950  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3951  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3952  break;
3953  } // switch (...)
3954 
3955  return std::make_pair(Value, OverflowCmp);
3956 }
3957 
3958 SDValue
3959 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
3960  // Let legalize expand this if it isn't a legal type yet.
3962  return SDValue();
3963 
3964  SDValue Value, OverflowCmp;
3965  SDValue ARMcc;
3966  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3967  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3968  SDLoc dl(Op);
3969  // We use 0 and 1 as false and true values.
3970  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3971  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3972  EVT VT = Op.getValueType();
3973 
3974  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3975  ARMcc, CCR, OverflowCmp);
3976 
3977  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3978  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3979 }
3980 
3982  SelectionDAG &DAG) {
3983  SDLoc DL(BoolCarry);
3984  EVT CarryVT = BoolCarry.getValueType();
3985 
3986  APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
3987  // This converts the boolean value carry into the carry flag by doing
3988  // ARMISD::ADDC Carry, ~0
3989  return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32),
3990  BoolCarry, DAG.getConstant(NegOne, DL, CarryVT));
3991 }
3992 
3994  SelectionDAG &DAG) {
3995  SDLoc DL(Flags);
3996 
3997  // Now convert the carry flag into a boolean carry. We do this
3998  // using ARMISD:ADDE 0, 0, Carry
3999  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4000  DAG.getConstant(0, DL, MVT::i32),
4001  DAG.getConstant(0, DL, MVT::i32), Flags);
4002 }
4003 
4004 SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4005  SelectionDAG &DAG) const {
4006  // Let legalize expand this if it isn't a legal type yet.
4008  return SDValue();
4009 
4010  SDValue LHS = Op.getOperand(0);
4011  SDValue RHS = Op.getOperand(1);
4012  SDLoc dl(Op);
4013 
4014  EVT VT = Op.getValueType();
4015  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4016  SDValue Value;
4017  SDValue Overflow;
4018  switch (Op.getOpcode()) {
4019  default:
4020  llvm_unreachable("Unknown overflow instruction!");
4021  case ISD::UADDO:
4022  Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4023  // Convert the carry flag into a boolean value.
4024  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4025  break;
4026  case ISD::USUBO:
4027  Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4028  // Convert the carry flag into a boolean value.
4029  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4030  // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4031  // value. So compute 1 - C.
4032  Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4033  DAG.getConstant(1, dl, MVT::i32), Overflow);
4034  break;
4035  }
4036 
4037  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4038 }
4039 
4040 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4041  SDValue Cond = Op.getOperand(0);
4042  SDValue SelectTrue = Op.getOperand(1);
4043  SDValue SelectFalse = Op.getOperand(2);
4044  SDLoc dl(Op);
4045  unsigned Opc = Cond.getOpcode();
4046 
4047  if (Cond.getResNo() == 1 &&
4048  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4049  Opc == ISD::USUBO)) {
4050  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4051  return SDValue();
4052 
4053  SDValue Value, OverflowCmp;
4054  SDValue ARMcc;
4055  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4056  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4057  EVT VT = Op.getValueType();
4058 
4059  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4060  OverflowCmp, DAG);
4061  }
4062 
4063  // Convert:
4064  //
4065  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4066  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4067  //
4068  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4069  const ConstantSDNode *CMOVTrue =
4071  const ConstantSDNode *CMOVFalse =
4073 
4074  if (CMOVTrue && CMOVFalse) {
4075  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4076  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4077 
4078  SDValue True;
4079  SDValue False;
4080  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4081  True = SelectTrue;
4082  False = SelectFalse;
4083  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4084  True = SelectFalse;
4085  False = SelectTrue;
4086  }
4087 
4088  if (True.getNode() && False.getNode()) {
4089  EVT VT = Op.getValueType();
4090  SDValue ARMcc = Cond.g