LLVM  7.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/CallingConv.h"
68 #include "llvm/IR/Constant.h"
69 #include "llvm/IR/Constants.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugLoc.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GlobalAlias.h"
75 #include "llvm/IR/GlobalValue.h"
76 #include "llvm/IR/GlobalVariable.h"
77 #include "llvm/IR/IRBuilder.h"
78 #include "llvm/IR/InlineAsm.h"
79 #include "llvm/IR/Instruction.h"
80 #include "llvm/IR/Instructions.h"
81 #include "llvm/IR/IntrinsicInst.h"
82 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/Module.h"
84 #include "llvm/IR/Type.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/IR/Value.h"
87 #include "llvm/MC/MCInstrDesc.h"
89 #include "llvm/MC/MCRegisterInfo.h"
90 #include "llvm/MC/MCSchedule.h"
93 #include "llvm/Support/Casting.h"
94 #include "llvm/Support/CodeGen.h"
96 #include "llvm/Support/Compiler.h"
97 #include "llvm/Support/Debug.h"
99 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 
117 #define DEBUG_TYPE "arm-isel"
118 
119 STATISTIC(NumTailCalls, "Number of tail calls");
120 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 STATISTIC(NumConstpoolPromoted,
123  "Number of constants with their storage promoted into constant pools");
124 
125 static cl::opt<bool>
126 ARMInterworking("arm-interworking", cl::Hidden,
127  cl::desc("Enable / disable ARM interworking (for debugging only)"),
128  cl::init(true));
129 
131  "arm-promote-constant", cl::Hidden,
132  cl::desc("Enable / disable promotion of unnamed_addr constants into "
133  "constant pools"),
134  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
136  "arm-promote-constant-max-size", cl::Hidden,
137  cl::desc("Maximum size of constant to promote into a constant pool"),
138  cl::init(64));
140  "arm-promote-constant-max-total", cl::Hidden,
141  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142  cl::init(128));
143 
144 // The APCS parameter registers.
145 static const MCPhysReg GPRArgRegs[] = {
146  ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 };
148 
149 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150  MVT PromotedBitwiseVT) {
151  if (VT != PromotedLdStVT) {
153  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 
156  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157  }
158 
159  MVT ElemTy = VT.getVectorElementType();
160  if (ElemTy != MVT::f64)
164  if (ElemTy == MVT::i32) {
169  } else {
174  }
183  if (VT.isInteger()) {
187  }
188 
189  // Promote all bit-wise operations.
190  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197  }
198 
199  // Neon does not support vector divide/remainder operations.
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211 }
212 
213 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPRRegClass);
215  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 }
217 
218 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219  addRegisterClass(VT, &ARM::DPairRegClass);
220  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 }
222 
224  const ARMSubtarget &STI)
225  : TargetLowering(TM), Subtarget(&STI) {
226  RegInfo = Subtarget->getRegisterInfo();
227  Itins = Subtarget->getInstrItineraryData();
228 
231 
232  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233  !Subtarget->isTargetWatchOS()) {
234  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
239  }
240 
241  if (Subtarget->isTargetMachO()) {
242  // Uses VFP for Thumb libfuncs if available.
243  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245  static const struct {
246  const RTLIB::Libcall Op;
247  const char * const Name;
248  const ISD::CondCode Cond;
249  } LibraryCalls[] = {
250  // Single-precision floating-point arithmetic.
251  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255 
256  // Double-precision floating-point arithmetic.
257  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261 
262  // Single-precision comparisons.
263  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271 
272  // Double-precision comparisons.
273  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281 
282  // Floating-point to integer conversions.
283  // i64 conversions are done via library routines even when generating VFP
284  // instructions, so use the same ones.
285  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289 
290  // Conversions between floating types.
291  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293 
294  // Integer to floating-point conversions.
295  // i64 conversions are done via library routines even when generating VFP
296  // instructions, so use the same ones.
297  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298  // e.g., __floatunsidf vs. __floatunssidfvfp.
299  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303  };
304 
305  for (const auto &LC : LibraryCalls) {
306  setLibcallName(LC.Op, LC.Name);
307  if (LC.Cond != ISD::SETCC_INVALID)
308  setCmpLibcallCC(LC.Op, LC.Cond);
309  }
310  }
311 
312  // Set the correct calling convention for ARMv7k WatchOS. It's just
313  // AAPCS_VFP for functions as simple as libcalls.
314  if (Subtarget->isTargetWatchABI()) {
315  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
317  }
318  }
319 
320  // These libcalls are not available in 32-bit.
321  setLibcallName(RTLIB::SHL_I128, nullptr);
322  setLibcallName(RTLIB::SRL_I128, nullptr);
323  setLibcallName(RTLIB::SRA_I128, nullptr);
324 
325  // RTLIB
326  if (Subtarget->isAAPCS_ABI() &&
327  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
328  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
329  static const struct {
330  const RTLIB::Libcall Op;
331  const char * const Name;
332  const CallingConv::ID CC;
333  const ISD::CondCode Cond;
334  } LibraryCalls[] = {
335  // Double-precision floating-point arithmetic helper functions
336  // RTABI chapter 4.1.2, Table 2
337  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
338  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
339  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341 
342  // Double-precision floating-point comparison helper functions
343  // RTABI chapter 4.1.2, Table 3
344  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
345  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
346  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
347  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
348  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
349  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
350  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
351  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
352 
353  // Single-precision floating-point arithmetic helper functions
354  // RTABI chapter 4.1.2, Table 4
355  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
356  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
357  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359 
360  // Single-precision floating-point comparison helper functions
361  // RTABI chapter 4.1.2, Table 5
362  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
363  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
364  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
365  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
366  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
367  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
368  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
369  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
370 
371  // Floating-point to integer conversions.
372  // RTABI chapter 4.1.2, Table 6
373  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
375  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 
382  // Conversions between floating types.
383  // RTABI chapter 4.1.2, Table 7
384  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387 
388  // Integer to floating-point conversions.
389  // RTABI chapter 4.1.2, Table 8
390  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
392  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 
399  // Long long helper functions
400  // RTABI chapter 4.2, Table 9
401  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 
406  // Integer division functions
407  // RTABI chapter 4.3.1
408  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
410  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416  };
417 
418  for (const auto &LC : LibraryCalls) {
419  setLibcallName(LC.Op, LC.Name);
420  setLibcallCallingConv(LC.Op, LC.CC);
421  if (LC.Cond != ISD::SETCC_INVALID)
422  setCmpLibcallCC(LC.Op, LC.Cond);
423  }
424 
425  // EABI dependent RTLIB
426  if (TM.Options.EABIVersion == EABI::EABI4 ||
428  static const struct {
429  const RTLIB::Libcall Op;
430  const char *const Name;
431  const CallingConv::ID CC;
432  const ISD::CondCode Cond;
433  } MemOpsLibraryCalls[] = {
434  // Memory operations
435  // RTABI chapter 4.3.4
437  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
438  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439  };
440 
441  for (const auto &LC : MemOpsLibraryCalls) {
442  setLibcallName(LC.Op, LC.Name);
443  setLibcallCallingConv(LC.Op, LC.CC);
444  if (LC.Cond != ISD::SETCC_INVALID)
445  setCmpLibcallCC(LC.Op, LC.Cond);
446  }
447  }
448  }
449 
450  if (Subtarget->isTargetWindows()) {
451  static const struct {
452  const RTLIB::Libcall Op;
453  const char * const Name;
454  const CallingConv::ID CC;
455  } LibraryCalls[] = {
456  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
457  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
458  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
459  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
460  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
461  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
462  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
463  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
464  };
465 
466  for (const auto &LC : LibraryCalls) {
467  setLibcallName(LC.Op, LC.Name);
468  setLibcallCallingConv(LC.Op, LC.CC);
469  }
470  }
471 
472  // Use divmod compiler-rt calls for iOS 5.0 and later.
473  if (Subtarget->isTargetMachO() &&
474  !(Subtarget->isTargetIOS() &&
475  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
476  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
477  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
478  }
479 
480  // The half <-> float conversion functions are always soft-float on
481  // non-watchos platforms, but are needed for some targets which use a
482  // hard-float calling convention by default.
483  if (!Subtarget->isTargetWatchABI()) {
484  if (Subtarget->isAAPCS_ABI()) {
485  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
486  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
487  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
488  } else {
489  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
490  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
491  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
492  }
493  }
494 
495  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
496  // a __gnu_ prefix (which is the default).
497  if (Subtarget->isTargetAEABI()) {
498  static const struct {
499  const RTLIB::Libcall Op;
500  const char * const Name;
501  const CallingConv::ID CC;
502  } LibraryCalls[] = {
503  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
504  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
505  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
506  };
507 
508  for (const auto &LC : LibraryCalls) {
509  setLibcallName(LC.Op, LC.Name);
510  setLibcallCallingConv(LC.Op, LC.CC);
511  }
512  }
513 
514  if (Subtarget->isThumb1Only())
515  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
516  else
517  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
518 
519  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
520  !Subtarget->isThumb1Only()) {
521  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
522  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
523  }
524 
525  if (Subtarget->hasFullFP16()) {
526  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
530  }
531 
532  for (MVT VT : MVT::vector_valuetypes()) {
533  for (MVT InnerVT : MVT::vector_valuetypes()) {
534  setTruncStoreAction(VT, InnerVT, Expand);
535  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
536  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
537  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
538  }
539 
544 
546  }
547 
550 
553 
554  if (Subtarget->hasNEON()) {
555  addDRTypeForNEON(MVT::v2f32);
556  addDRTypeForNEON(MVT::v8i8);
557  addDRTypeForNEON(MVT::v4i16);
558  addDRTypeForNEON(MVT::v2i32);
559  addDRTypeForNEON(MVT::v1i64);
560 
561  addQRTypeForNEON(MVT::v4f32);
562  addQRTypeForNEON(MVT::v2f64);
563  addQRTypeForNEON(MVT::v16i8);
564  addQRTypeForNEON(MVT::v8i16);
565  addQRTypeForNEON(MVT::v4i32);
566  addQRTypeForNEON(MVT::v2i64);
567 
568  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
569  // neither Neon nor VFP support any arithmetic operations on it.
570  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
571  // supported for v4f32.
575  // FIXME: Code duplication: FDIV and FREM are expanded always, see
576  // ARMTargetLowering::addTypeForNEON method for details.
579  // FIXME: Create unittest.
580  // In another words, find a way when "copysign" appears in DAG with vector
581  // operands.
583  // FIXME: Code duplication: SETCC has custom operation action, see
584  // ARMTargetLowering::addTypeForNEON method for details.
586  // FIXME: Create unittest for FNEG and for FABS.
598  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
605 
620 
621  // Mark v2f32 intrinsics.
636 
637  // Neon does not support some operations on v1i64 and v2i64 types.
639  // Custom handling for some quad-vector types to detect VMULL.
643  // Custom handling for some vector types to avoid expensive expansions
648  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
649  // a destination type that is wider than the source, and nor does
650  // it have a FP_TO_[SU]INT instruction with a narrower destination than
651  // source.
656 
659 
660  // NEON does not have single instruction CTPOP for vectors with element
661  // types wider than 8-bits. However, custom lowering can leverage the
662  // v8i8/v16i8 vcnt instruction.
669 
672 
673  // NEON does not have single instruction CTTZ for vectors.
678 
683 
688 
693 
694  // NEON only has FMA instructions as of VFP4.
695  if (!Subtarget->hasVFP4()) {
698  }
699 
717 
718  // It is legal to extload from v4i8 to v4i16 or v4i32.
720  MVT::v2i32}) {
721  for (MVT VT : MVT::integer_vector_valuetypes()) {
725  }
726  }
727  }
728 
729  if (Subtarget->isFPOnlySP()) {
730  // When targeting a floating-point unit with only single-precision
731  // operations, f64 is legal for the few double-precision instructions which
732  // are present However, no double-precision operations other than moves,
733  // loads and stores are provided by the hardware.
766  }
767 
769 
770  // ARM does not have floating-point extending loads.
771  for (MVT VT : MVT::fp_valuetypes()) {
774  }
775 
776  // ... or truncating stores
780 
781  // ARM does not have i1 sign extending load.
782  for (MVT VT : MVT::integer_valuetypes())
784 
785  // ARM supports all 4 flavors of integer indexed load / store.
786  if (!Subtarget->isThumb1Only()) {
787  for (unsigned im = (unsigned)ISD::PRE_INC;
797  }
798  } else {
799  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
802  }
803 
808 
811 
812  // i64 operation support.
815  if (Subtarget->isThumb1Only()) {
818  }
819  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
820  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
822 
829 
830  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
831  if (Subtarget->isThumb1Only()) {
835  }
836 
841 
842  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
844 
845  // ARM does not have ROTL.
847  for (MVT VT : MVT::vector_valuetypes()) {
850  }
853  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
855 
856  // @llvm.readcyclecounter requires the Performance Monitors extension.
857  // Default to the 0 expansion on unsupported platforms.
858  // FIXME: Technically there are older ARM CPUs that have
859  // implementation-specific ways of obtaining this information.
860  if (Subtarget->hasPerfMon())
862 
863  // Only ARMv6 has BSWAP.
864  if (!Subtarget->hasV6Ops())
866 
867  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
868  : Subtarget->hasDivideInARMMode();
869  if (!hasDivide) {
870  // These are expanded into libcalls if the cpu doesn't have HW divider.
873  }
874 
875  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
878 
881  }
882 
885 
886  // Register based DivRem for AEABI (RTABI 4.2)
887  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
888  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
889  Subtarget->isTargetWindows()) {
892  HasStandaloneRem = false;
893 
894  if (Subtarget->isTargetWindows()) {
895  const struct {
896  const RTLIB::Libcall Op;
897  const char * const Name;
898  const CallingConv::ID CC;
899  } LibraryCalls[] = {
900  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
901  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
902  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
903  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
904 
905  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
906  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
907  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
908  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
909  };
910 
911  for (const auto &LC : LibraryCalls) {
912  setLibcallName(LC.Op, LC.Name);
913  setLibcallCallingConv(LC.Op, LC.CC);
914  }
915  } else {
916  const struct {
917  const RTLIB::Libcall Op;
918  const char * const Name;
919  const CallingConv::ID CC;
920  } LibraryCalls[] = {
921  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
922  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
923  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
924  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
925 
926  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
927  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
928  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
929  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
930  };
931 
932  for (const auto &LC : LibraryCalls) {
933  setLibcallName(LC.Op, LC.Name);
934  setLibcallCallingConv(LC.Op, LC.CC);
935  }
936  }
937 
942  } else {
945  }
946 
947  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
948  for (auto &VT : {MVT::f32, MVT::f64})
950 
955 
957 
958  // Use the default implementation.
965 
966  if (Subtarget->isTargetWindows())
968  else
970 
971  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
972  // the default expansion.
973  InsertFencesForAtomic = false;
974  if (Subtarget->hasAnyDataBarrier() &&
975  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
976  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
977  // to ldrex/strex loops already.
979  if (!Subtarget->isThumb() || !Subtarget->isMClass())
981 
982  // On v8, we have particularly efficient implementations of atomic fences
983  // if they can be combined with nearby atomic loads and stores.
984  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
985  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
986  InsertFencesForAtomic = true;
987  }
988  } else {
989  // If there's anything we can use as a barrier, go through custom lowering
990  // for ATOMIC_FENCE.
991  // If target has DMB in thumb, Fences can be inserted.
992  if (Subtarget->hasDataBarrier())
993  InsertFencesForAtomic = true;
994 
996  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
997 
998  // Set them all for expansion, which will force libcalls.
1011  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1012  // Unordered/Monotonic case.
1013  if (!InsertFencesForAtomic) {
1016  }
1017  }
1018 
1020 
1021  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1022  if (!Subtarget->hasV6Ops()) {
1025  }
1027 
1028  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1029  !Subtarget->isThumb1Only()) {
1030  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1031  // iff target supports vfp2.
1034  }
1035 
1036  // We want to custom lower some of our intrinsics.
1041  if (Subtarget->useSjLjEH())
1042  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1043 
1054 
1055  // Thumb-1 cannot currently select ARMISD::SUBE.
1056  if (!Subtarget->isThumb1Only())
1058 
1064 
1065  // We don't support sin/cos/fmod/copysign/pow
1074  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1075  !Subtarget->isThumb1Only()) {
1078  }
1081 
1082  if (!Subtarget->hasVFP4()) {
1085  }
1086 
1087  // Various VFP goodness
1088  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1089  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1090  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1093  }
1094 
1095  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1096  if (!Subtarget->hasFP16()) {
1099  }
1100  }
1101 
1102  // Use __sincos_stret if available.
1103  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1104  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1107  }
1108 
1109  // FP-ARMv8 implements a lot of rounding-like FP operations.
1110  if (Subtarget->hasFPARMv8()) {
1123 
1124  if (!Subtarget->isFPOnlySP()) {
1133  }
1134  }
1135 
1136  if (Subtarget->hasNEON()) {
1137  // vmin and vmax aren't available in a scalar form, so we use
1138  // a NEON instruction with an undef lane instead.
1145  }
1146 
1147  // We have target-specific dag combine patterns for the following nodes:
1148  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1155 
1156  if (Subtarget->hasV6Ops())
1158 
1160 
1161  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1162  !Subtarget->hasVFP2())
1164  else
1166 
1167  //// temporary - rewrite interface to use type
1168  MaxStoresPerMemset = 8;
1170  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1172  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1174 
1175  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1176  // are at least 4 bytes aligned.
1178 
1179  // Prefer likely predicted branches to selects on out-of-order cores.
1180  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1181 
1182  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1183 }
1184 
1186  return Subtarget->useSoftFloat();
1187 }
1188 
1189 // FIXME: It might make sense to define the representative register class as the
1190 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1191 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1192 // SPR's representative would be DPR_VFP2. This should work well if register
1193 // pressure tracking were modified such that a register use would increment the
1194 // pressure of the register class's representative and all of it's super
1195 // classes' representatives transitively. We have not implemented this because
1196 // of the difficulty prior to coalescing of modeling operand register classes
1197 // due to the common occurrence of cross class copies and subregister insertions
1198 // and extractions.
1199 std::pair<const TargetRegisterClass *, uint8_t>
1201  MVT VT) const {
1202  const TargetRegisterClass *RRC = nullptr;
1203  uint8_t Cost = 1;
1204  switch (VT.SimpleTy) {
1205  default:
1207  // Use DPR as representative register class for all floating point
1208  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1209  // the cost is 1 for both f32 and f64.
1210  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1211  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1212  RRC = &ARM::DPRRegClass;
1213  // When NEON is used for SP, only half of the register file is available
1214  // because operations that define both SP and DP results will be constrained
1215  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1216  // coalescing by double-counting the SP regs. See the FIXME above.
1217  if (Subtarget->useNEONForSinglePrecisionFP())
1218  Cost = 2;
1219  break;
1220  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1221  case MVT::v4f32: case MVT::v2f64:
1222  RRC = &ARM::DPRRegClass;
1223  Cost = 2;
1224  break;
1225  case MVT::v4i64:
1226  RRC = &ARM::DPRRegClass;
1227  Cost = 4;
1228  break;
1229  case MVT::v8i64:
1230  RRC = &ARM::DPRRegClass;
1231  Cost = 8;
1232  break;
1233  }
1234  return std::make_pair(RRC, Cost);
1235 }
1236 
1237 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1238  switch ((ARMISD::NodeType)Opcode) {
1239  case ARMISD::FIRST_NUMBER: break;
1240  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1241  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1242  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1243  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1244  case ARMISD::CALL: return "ARMISD::CALL";
1245  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1246  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1247  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1248  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1249  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1250  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1251  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1252  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1253  case ARMISD::CMP: return "ARMISD::CMP";
1254  case ARMISD::CMN: return "ARMISD::CMN";
1255  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1256  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1257  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1258  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1259  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1260 
1261  case ARMISD::CMOV: return "ARMISD::CMOV";
1262 
1263  case ARMISD::SSAT: return "ARMISD::SSAT";
1264  case ARMISD::USAT: return "ARMISD::USAT";
1265 
1266  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1267  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1268  case ARMISD::RRX: return "ARMISD::RRX";
1269 
1270  case ARMISD::ADDC: return "ARMISD::ADDC";
1271  case ARMISD::ADDE: return "ARMISD::ADDE";
1272  case ARMISD::SUBC: return "ARMISD::SUBC";
1273  case ARMISD::SUBE: return "ARMISD::SUBE";
1274 
1275  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1276  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1277  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1278  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1279 
1280  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1281  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1282  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1283 
1284  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1285 
1286  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1287 
1288  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1289 
1290  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1291 
1292  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1293 
1294  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1295  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1296 
1297  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1298  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1299  case ARMISD::VCGE: return "ARMISD::VCGE";
1300  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1301  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1302  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1303  case ARMISD::VCGT: return "ARMISD::VCGT";
1304  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1305  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1306  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1307  case ARMISD::VTST: return "ARMISD::VTST";
1308 
1309  case ARMISD::VSHL: return "ARMISD::VSHL";
1310  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1311  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1312  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1313  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1314  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1315  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1316  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1317  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1318  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1319  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1320  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1321  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1322  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1323  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1324  case ARMISD::VSLI: return "ARMISD::VSLI";
1325  case ARMISD::VSRI: return "ARMISD::VSRI";
1326  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1327  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1328  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1329  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1330  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1331  case ARMISD::VDUP: return "ARMISD::VDUP";
1332  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1333  case ARMISD::VEXT: return "ARMISD::VEXT";
1334  case ARMISD::VREV64: return "ARMISD::VREV64";
1335  case ARMISD::VREV32: return "ARMISD::VREV32";
1336  case ARMISD::VREV16: return "ARMISD::VREV16";
1337  case ARMISD::VZIP: return "ARMISD::VZIP";
1338  case ARMISD::VUZP: return "ARMISD::VUZP";
1339  case ARMISD::VTRN: return "ARMISD::VTRN";
1340  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1341  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1342  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1343  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1344  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1345  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1346  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1347  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1348  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1349  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1350  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1351  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1352  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1353  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1354  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1355  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1356  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1357  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1358  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1359  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1360  case ARMISD::BFI: return "ARMISD::BFI";
1361  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1362  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1363  case ARMISD::VBSL: return "ARMISD::VBSL";
1364  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1365  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1366  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1367  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1368  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1369  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1370  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1371  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1372  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1373  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1374  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1375  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1376  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1377  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1378  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1379  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1380  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1381  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1382  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1383  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1384  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1385  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1386  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1387  }
1388  return nullptr;
1389 }
1390 
1392  EVT VT) const {
1393  if (!VT.isVector())
1394  return getPointerTy(DL);
1396 }
1397 
1398 /// getRegClassFor - Return the register class that should be used for the
1399 /// specified value type.
1401  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1402  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1403  // load / store 4 to 8 consecutive D registers.
1404  if (Subtarget->hasNEON()) {
1405  if (VT == MVT::v4i64)
1406  return &ARM::QQPRRegClass;
1407  if (VT == MVT::v8i64)
1408  return &ARM::QQQQPRRegClass;
1409  }
1410  return TargetLowering::getRegClassFor(VT);
1411 }
1412 
1413 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1414 // source/dest is aligned and the copy size is large enough. We therefore want
1415 // to align such objects passed to memory intrinsics.
1417  unsigned &PrefAlign) const {
1418  if (!isa<MemIntrinsic>(CI))
1419  return false;
1420  MinSize = 8;
1421  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1422  // cycle faster than 4-byte aligned LDM.
1423  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1424  return true;
1425 }
1426 
1427 // Create a fast isel object.
1428 FastISel *
1430  const TargetLibraryInfo *libInfo) const {
1431  return ARM::createFastISel(funcInfo, libInfo);
1432 }
1433 
1435  unsigned NumVals = N->getNumValues();
1436  if (!NumVals)
1437  return Sched::RegPressure;
1438 
1439  for (unsigned i = 0; i != NumVals; ++i) {
1440  EVT VT = N->getValueType(i);
1441  if (VT == MVT::Glue || VT == MVT::Other)
1442  continue;
1443  if (VT.isFloatingPoint() || VT.isVector())
1444  return Sched::ILP;
1445  }
1446 
1447  if (!N->isMachineOpcode())
1448  return Sched::RegPressure;
1449 
1450  // Load are scheduled for latency even if there instruction itinerary
1451  // is not available.
1452  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1453  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1454 
1455  if (MCID.getNumDefs() == 0)
1456  return Sched::RegPressure;
1457  if (!Itins->isEmpty() &&
1458  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1459  return Sched::ILP;
1460 
1461  return Sched::RegPressure;
1462 }
1463 
1464 //===----------------------------------------------------------------------===//
1465 // Lowering Code
1466 //===----------------------------------------------------------------------===//
1467 
1468 static bool isSRL16(const SDValue &Op) {
1469  if (Op.getOpcode() != ISD::SRL)
1470  return false;
1471  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1472  return Const->getZExtValue() == 16;
1473  return false;
1474 }
1475 
1476 static bool isSRA16(const SDValue &Op) {
1477  if (Op.getOpcode() != ISD::SRA)
1478  return false;
1479  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1480  return Const->getZExtValue() == 16;
1481  return false;
1482 }
1483 
1484 static bool isSHL16(const SDValue &Op) {
1485  if (Op.getOpcode() != ISD::SHL)
1486  return false;
1487  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1488  return Const->getZExtValue() == 16;
1489  return false;
1490 }
1491 
1492 // Check for a signed 16-bit value. We special case SRA because it makes it
1493 // more simple when also looking for SRAs that aren't sign extending a
1494 // smaller value. Without the check, we'd need to take extra care with
1495 // checking order for some operations.
1496 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1497  if (isSRA16(Op))
1498  return isSHL16(Op.getOperand(0));
1499  return DAG.ComputeNumSignBits(Op) == 17;
1500 }
1501 
1502 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1504  switch (CC) {
1505  default: llvm_unreachable("Unknown condition code!");
1506  case ISD::SETNE: return ARMCC::NE;
1507  case ISD::SETEQ: return ARMCC::EQ;
1508  case ISD::SETGT: return ARMCC::GT;
1509  case ISD::SETGE: return ARMCC::GE;
1510  case ISD::SETLT: return ARMCC::LT;
1511  case ISD::SETLE: return ARMCC::LE;
1512  case ISD::SETUGT: return ARMCC::HI;
1513  case ISD::SETUGE: return ARMCC::HS;
1514  case ISD::SETULT: return ARMCC::LO;
1515  case ISD::SETULE: return ARMCC::LS;
1516  }
1517 }
1518 
1519 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1521  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1522  CondCode2 = ARMCC::AL;
1523  InvalidOnQNaN = true;
1524  switch (CC) {
1525  default: llvm_unreachable("Unknown FP condition!");
1526  case ISD::SETEQ:
1527  case ISD::SETOEQ:
1528  CondCode = ARMCC::EQ;
1529  InvalidOnQNaN = false;
1530  break;
1531  case ISD::SETGT:
1532  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1533  case ISD::SETGE:
1534  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1535  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1536  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1537  case ISD::SETONE:
1538  CondCode = ARMCC::MI;
1539  CondCode2 = ARMCC::GT;
1540  InvalidOnQNaN = false;
1541  break;
1542  case ISD::SETO: CondCode = ARMCC::VC; break;
1543  case ISD::SETUO: CondCode = ARMCC::VS; break;
1544  case ISD::SETUEQ:
1545  CondCode = ARMCC::EQ;
1546  CondCode2 = ARMCC::VS;
1547  InvalidOnQNaN = false;
1548  break;
1549  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1550  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1551  case ISD::SETLT:
1552  case ISD::SETULT: CondCode = ARMCC::LT; break;
1553  case ISD::SETLE:
1554  case ISD::SETULE: CondCode = ARMCC::LE; break;
1555  case ISD::SETNE:
1556  case ISD::SETUNE:
1557  CondCode = ARMCC::NE;
1558  InvalidOnQNaN = false;
1559  break;
1560  }
1561 }
1562 
1563 //===----------------------------------------------------------------------===//
1564 // Calling Convention Implementation
1565 //===----------------------------------------------------------------------===//
1566 
1567 #include "ARMGenCallingConv.inc"
1568 
1569 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1570 /// account presence of floating point hardware and calling convention
1571 /// limitations, such as support for variadic functions.
1573 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1574  bool isVarArg) const {
1575  switch (CC) {
1576  default:
1577  report_fatal_error("Unsupported calling convention");
1579  case CallingConv::ARM_APCS:
1580  case CallingConv::GHC:
1581  return CC;
1585  case CallingConv::Swift:
1587  case CallingConv::C:
1588  if (!Subtarget->isAAPCS_ABI())
1589  return CallingConv::ARM_APCS;
1590  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1592  !isVarArg)
1594  else
1595  return CallingConv::ARM_AAPCS;
1596  case CallingConv::Fast:
1598  if (!Subtarget->isAAPCS_ABI()) {
1599  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1600  return CallingConv::Fast;
1601  return CallingConv::ARM_APCS;
1602  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1604  else
1605  return CallingConv::ARM_AAPCS;
1606  }
1607 }
1608 
1610  bool isVarArg) const {
1611  return CCAssignFnForNode(CC, false, isVarArg);
1612 }
1613 
1615  bool isVarArg) const {
1616  return CCAssignFnForNode(CC, true, isVarArg);
1617 }
1618 
1619 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1620 /// CallingConvention.
1621 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1622  bool Return,
1623  bool isVarArg) const {
1624  switch (getEffectiveCallingConv(CC, isVarArg)) {
1625  default:
1626  report_fatal_error("Unsupported calling convention");
1627  case CallingConv::ARM_APCS:
1628  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1630  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1632  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1633  case CallingConv::Fast:
1634  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1635  case CallingConv::GHC:
1636  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1638  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1639  }
1640 }
1641 
1642 /// LowerCallResult - Lower the result values of a call into the
1643 /// appropriate copies out of appropriate physical registers.
1644 SDValue ARMTargetLowering::LowerCallResult(
1645  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1646  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1647  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1648  SDValue ThisVal) const {
1649  // Assign locations to each value returned by this call.
1651  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1652  *DAG.getContext());
1653  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1654 
1655  // Copy all of the result registers out of their specified physreg.
1656  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1657  CCValAssign VA = RVLocs[i];
1658 
1659  // Pass 'this' value directly from the argument to return value, to avoid
1660  // reg unit interference
1661  if (i == 0 && isThisReturn) {
1662  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1663  "unexpected return calling convention register assignment");
1664  InVals.push_back(ThisVal);
1665  continue;
1666  }
1667 
1668  SDValue Val;
1669  if (VA.needsCustom()) {
1670  // Handle f64 or half of a v2f64.
1671  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1672  InFlag);
1673  Chain = Lo.getValue(1);
1674  InFlag = Lo.getValue(2);
1675  VA = RVLocs[++i]; // skip ahead to next loc
1676  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1677  InFlag);
1678  Chain = Hi.getValue(1);
1679  InFlag = Hi.getValue(2);
1680  if (!Subtarget->isLittle())
1681  std::swap (Lo, Hi);
1682  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1683 
1684  if (VA.getLocVT() == MVT::v2f64) {
1685  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1686  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1687  DAG.getConstant(0, dl, MVT::i32));
1688 
1689  VA = RVLocs[++i]; // skip ahead to next loc
1690  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1691  Chain = Lo.getValue(1);
1692  InFlag = Lo.getValue(2);
1693  VA = RVLocs[++i]; // skip ahead to next loc
1694  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1695  Chain = Hi.getValue(1);
1696  InFlag = Hi.getValue(2);
1697  if (!Subtarget->isLittle())
1698  std::swap (Lo, Hi);
1699  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1700  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1701  DAG.getConstant(1, dl, MVT::i32));
1702  }
1703  } else {
1704  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1705  InFlag);
1706  Chain = Val.getValue(1);
1707  InFlag = Val.getValue(2);
1708  }
1709 
1710  switch (VA.getLocInfo()) {
1711  default: llvm_unreachable("Unknown loc info!");
1712  case CCValAssign::Full: break;
1713  case CCValAssign::BCvt:
1714  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1715  break;
1716  }
1717 
1718  InVals.push_back(Val);
1719  }
1720 
1721  return Chain;
1722 }
1723 
1724 /// LowerMemOpCallTo - Store the argument to the stack.
1725 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1726  SDValue Arg, const SDLoc &dl,
1727  SelectionDAG &DAG,
1728  const CCValAssign &VA,
1729  ISD::ArgFlagsTy Flags) const {
1730  unsigned LocMemOffset = VA.getLocMemOffset();
1731  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1732  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1733  StackPtr, PtrOff);
1734  return DAG.getStore(
1735  Chain, dl, Arg, PtrOff,
1736  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1737 }
1738 
1739 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1740  SDValue Chain, SDValue &Arg,
1741  RegsToPassVector &RegsToPass,
1742  CCValAssign &VA, CCValAssign &NextVA,
1743  SDValue &StackPtr,
1744  SmallVectorImpl<SDValue> &MemOpChains,
1745  ISD::ArgFlagsTy Flags) const {
1746  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1747  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1748  unsigned id = Subtarget->isLittle() ? 0 : 1;
1749  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1750 
1751  if (NextVA.isRegLoc())
1752  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1753  else {
1754  assert(NextVA.isMemLoc());
1755  if (!StackPtr.getNode())
1756  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1757  getPointerTy(DAG.getDataLayout()));
1758 
1759  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1760  dl, DAG, NextVA,
1761  Flags));
1762  }
1763 }
1764 
1765 /// LowerCall - Lowering a call into a callseq_start <-
1766 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1767 /// nodes.
1768 SDValue
1769 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1770  SmallVectorImpl<SDValue> &InVals) const {
1771  SelectionDAG &DAG = CLI.DAG;
1772  SDLoc &dl = CLI.DL;
1774  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1776  SDValue Chain = CLI.Chain;
1777  SDValue Callee = CLI.Callee;
1778  bool &isTailCall = CLI.IsTailCall;
1779  CallingConv::ID CallConv = CLI.CallConv;
1780  bool doesNotRet = CLI.DoesNotReturn;
1781  bool isVarArg = CLI.IsVarArg;
1782 
1783  MachineFunction &MF = DAG.getMachineFunction();
1784  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1785  bool isThisReturn = false;
1786  bool isSibCall = false;
1787  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1788 
1789  // Disable tail calls if they're not supported.
1790  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1791  isTailCall = false;
1792 
1793  if (isTailCall) {
1794  // Check if it's really possible to do a tail call.
1795  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1796  isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1797  Outs, OutVals, Ins, DAG);
1798  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1799  report_fatal_error("failed to perform tail call elimination on a call "
1800  "site marked musttail");
1801  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1802  // detected sibcalls.
1803  if (isTailCall) {
1804  ++NumTailCalls;
1805  isSibCall = true;
1806  }
1807  }
1808 
1809  // Analyze operands of the call, assigning locations to each operand.
1811  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1812  *DAG.getContext());
1813  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1814 
1815  // Get a count of how many bytes are to be pushed on the stack.
1816  unsigned NumBytes = CCInfo.getNextStackOffset();
1817 
1818  // For tail calls, memory operands are available in our caller's stack.
1819  if (isSibCall)
1820  NumBytes = 0;
1821 
1822  // Adjust the stack pointer for the new arguments...
1823  // These operations are automatically eliminated by the prolog/epilog pass
1824  if (!isSibCall)
1825  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1826 
1827  SDValue StackPtr =
1828  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1829 
1830  RegsToPassVector RegsToPass;
1831  SmallVector<SDValue, 8> MemOpChains;
1832 
1833  // Walk the register/memloc assignments, inserting copies/loads. In the case
1834  // of tail call optimization, arguments are handled later.
1835  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1836  i != e;
1837  ++i, ++realArgIdx) {
1838  CCValAssign &VA = ArgLocs[i];
1839  SDValue Arg = OutVals[realArgIdx];
1840  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1841  bool isByVal = Flags.isByVal();
1842 
1843  // Promote the value if needed.
1844  switch (VA.getLocInfo()) {
1845  default: llvm_unreachable("Unknown loc info!");
1846  case CCValAssign::Full: break;
1847  case CCValAssign::SExt:
1848  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1849  break;
1850  case CCValAssign::ZExt:
1851  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1852  break;
1853  case CCValAssign::AExt:
1854  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1855  break;
1856  case CCValAssign::BCvt:
1857  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1858  break;
1859  }
1860 
1861  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1862  if (VA.needsCustom()) {
1863  if (VA.getLocVT() == MVT::v2f64) {
1864  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1865  DAG.getConstant(0, dl, MVT::i32));
1866  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1867  DAG.getConstant(1, dl, MVT::i32));
1868 
1869  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1870  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1871 
1872  VA = ArgLocs[++i]; // skip ahead to next loc
1873  if (VA.isRegLoc()) {
1874  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1875  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1876  } else {
1877  assert(VA.isMemLoc());
1878 
1879  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1880  dl, DAG, VA, Flags));
1881  }
1882  } else {
1883  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1884  StackPtr, MemOpChains, Flags);
1885  }
1886  } else if (VA.isRegLoc()) {
1887  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1888  Outs[0].VT == MVT::i32) {
1889  assert(VA.getLocVT() == MVT::i32 &&
1890  "unexpected calling convention register assignment");
1891  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1892  "unexpected use of 'returned'");
1893  isThisReturn = true;
1894  }
1895  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1896  } else if (isByVal) {
1897  assert(VA.isMemLoc());
1898  unsigned offset = 0;
1899 
1900  // True if this byval aggregate will be split between registers
1901  // and memory.
1902  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1903  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1904 
1905  if (CurByValIdx < ByValArgsCount) {
1906 
1907  unsigned RegBegin, RegEnd;
1908  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1909 
1910  EVT PtrVT =
1912  unsigned int i, j;
1913  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1914  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1915  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1916  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1918  DAG.InferPtrAlignment(AddArg));
1919  MemOpChains.push_back(Load.getValue(1));
1920  RegsToPass.push_back(std::make_pair(j, Load));
1921  }
1922 
1923  // If parameter size outsides register area, "offset" value
1924  // helps us to calculate stack slot for remained part properly.
1925  offset = RegEnd - RegBegin;
1926 
1927  CCInfo.nextInRegsParam();
1928  }
1929 
1930  if (Flags.getByValSize() > 4*offset) {
1931  auto PtrVT = getPointerTy(DAG.getDataLayout());
1932  unsigned LocMemOffset = VA.getLocMemOffset();
1933  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1934  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1935  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1936  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1937  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1938  MVT::i32);
1939  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1940  MVT::i32);
1941 
1942  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1943  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1944  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1945  Ops));
1946  }
1947  } else if (!isSibCall) {
1948  assert(VA.isMemLoc());
1949 
1950  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1951  dl, DAG, VA, Flags));
1952  }
1953  }
1954 
1955  if (!MemOpChains.empty())
1956  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1957 
1958  // Build a sequence of copy-to-reg nodes chained together with token chain
1959  // and flag operands which copy the outgoing args into the appropriate regs.
1960  SDValue InFlag;
1961  // Tail call byval lowering might overwrite argument registers so in case of
1962  // tail call optimization the copies to registers are lowered later.
1963  if (!isTailCall)
1964  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1965  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1966  RegsToPass[i].second, InFlag);
1967  InFlag = Chain.getValue(1);
1968  }
1969 
1970  // For tail calls lower the arguments to the 'real' stack slot.
1971  if (isTailCall) {
1972  // Force all the incoming stack arguments to be loaded from the stack
1973  // before any new outgoing arguments are stored to the stack, because the
1974  // outgoing stack slots may alias the incoming argument stack slots, and
1975  // the alias isn't otherwise explicit. This is slightly more conservative
1976  // than necessary, because it means that each store effectively depends
1977  // on every argument instead of just those arguments it would clobber.
1978 
1979  // Do not flag preceding copytoreg stuff together with the following stuff.
1980  InFlag = SDValue();
1981  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1982  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1983  RegsToPass[i].second, InFlag);
1984  InFlag = Chain.getValue(1);
1985  }
1986  InFlag = SDValue();
1987  }
1988 
1989  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1990  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1991  // node so that legalize doesn't hack it.
1992  bool isDirect = false;
1993 
1994  const TargetMachine &TM = getTargetMachine();
1995  const Module *Mod = MF.getFunction().getParent();
1996  const GlobalValue *GV = nullptr;
1997  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1998  GV = G->getGlobal();
1999  bool isStub =
2000  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2001 
2002  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2003  bool isLocalARMFunc = false;
2005  auto PtrVt = getPointerTy(DAG.getDataLayout());
2006 
2007  if (Subtarget->genLongCalls()) {
2008  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2009  "long-calls codegen is not position independent!");
2010  // Handle a global address or an external symbol. If it's not one of
2011  // those, the target's already in a register, so we don't need to do
2012  // anything extra.
2013  if (isa<GlobalAddressSDNode>(Callee)) {
2014  // Create a constant pool entry for the callee address
2015  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2016  ARMConstantPoolValue *CPV =
2017  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2018 
2019  // Get the address of the callee into a register
2020  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2021  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2022  Callee = DAG.getLoad(
2023  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2025  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2026  const char *Sym = S->getSymbol();
2027 
2028  // Create a constant pool entry for the callee address
2029  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2030  ARMConstantPoolValue *CPV =
2032  ARMPCLabelIndex, 0);
2033  // Get the address of the callee into a register
2034  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2035  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2036  Callee = DAG.getLoad(
2037  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2039  }
2040  } else if (isa<GlobalAddressSDNode>(Callee)) {
2041  // If we're optimizing for minimum size and the function is called three or
2042  // more times in this block, we can improve codesize by calling indirectly
2043  // as BLXr has a 16-bit encoding.
2044  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2045  auto *BB = CLI.CS.getParent();
2046  bool PreferIndirect =
2047  Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2048  count_if(GV->users(), [&BB](const User *U) {
2049  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2050  }) > 2;
2051 
2052  if (!PreferIndirect) {
2053  isDirect = true;
2054  bool isDef = GV->isStrongDefinitionForLinker();
2055 
2056  // ARM call to a local ARM function is predicable.
2057  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2058  // tBX takes a register source operand.
2059  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2060  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2061  Callee = DAG.getNode(
2062  ARMISD::WrapperPIC, dl, PtrVt,
2063  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2064  Callee = DAG.getLoad(
2065  PtrVt, dl, DAG.getEntryNode(), Callee,
2067  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2069  } else if (Subtarget->isTargetCOFF()) {
2070  assert(Subtarget->isTargetWindows() &&
2071  "Windows is the only supported COFF target");
2072  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2075  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2076  TargetFlags);
2077  if (GV->hasDLLImportStorageClass())
2078  Callee =
2079  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2080  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2082  } else {
2083  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2084  }
2085  }
2086  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2087  isDirect = true;
2088  // tBX takes a register source operand.
2089  const char *Sym = S->getSymbol();
2090  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2091  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2092  ARMConstantPoolValue *CPV =
2094  ARMPCLabelIndex, 4);
2095  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2096  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2097  Callee = DAG.getLoad(
2098  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2100  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2101  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2102  } else {
2103  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2104  }
2105  }
2106 
2107  // FIXME: handle tail calls differently.
2108  unsigned CallOpc;
2109  if (Subtarget->isThumb()) {
2110  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2111  CallOpc = ARMISD::CALL_NOLINK;
2112  else
2113  CallOpc = ARMISD::CALL;
2114  } else {
2115  if (!isDirect && !Subtarget->hasV5TOps())
2116  CallOpc = ARMISD::CALL_NOLINK;
2117  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2118  // Emit regular call when code size is the priority
2119  !MF.getFunction().optForMinSize())
2120  // "mov lr, pc; b _foo" to avoid confusing the RSP
2121  CallOpc = ARMISD::CALL_NOLINK;
2122  else
2123  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2124  }
2125 
2126  std::vector<SDValue> Ops;
2127  Ops.push_back(Chain);
2128  Ops.push_back(Callee);
2129 
2130  // Add argument registers to the end of the list so that they are known live
2131  // into the call.
2132  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2133  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2134  RegsToPass[i].second.getValueType()));
2135 
2136  // Add a register mask operand representing the call-preserved registers.
2137  if (!isTailCall) {
2138  const uint32_t *Mask;
2139  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2140  if (isThisReturn) {
2141  // For 'this' returns, use the R0-preserving mask if applicable
2142  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2143  if (!Mask) {
2144  // Set isThisReturn to false if the calling convention is not one that
2145  // allows 'returned' to be modeled in this way, so LowerCallResult does
2146  // not try to pass 'this' straight through
2147  isThisReturn = false;
2148  Mask = ARI->getCallPreservedMask(MF, CallConv);
2149  }
2150  } else
2151  Mask = ARI->getCallPreservedMask(MF, CallConv);
2152 
2153  assert(Mask && "Missing call preserved mask for calling convention");
2154  Ops.push_back(DAG.getRegisterMask(Mask));
2155  }
2156 
2157  if (InFlag.getNode())
2158  Ops.push_back(InFlag);
2159 
2160  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2161  if (isTailCall) {
2163  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2164  }
2165 
2166  // Returns a chain and a flag for retval copy to use.
2167  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2168  InFlag = Chain.getValue(1);
2169 
2170  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2171  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2172  if (!Ins.empty())
2173  InFlag = Chain.getValue(1);
2174 
2175  // Handle result values, copying them out of physregs into vregs that we
2176  // return.
2177  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2178  InVals, isThisReturn,
2179  isThisReturn ? OutVals[0] : SDValue());
2180 }
2181 
2182 /// HandleByVal - Every parameter *after* a byval parameter is passed
2183 /// on the stack. Remember the next parameter register to allocate,
2184 /// and then confiscate the rest of the parameter registers to insure
2185 /// this.
2186 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2187  unsigned Align) const {
2188  // Byval (as with any stack) slots are always at least 4 byte aligned.
2189  Align = std::max(Align, 4U);
2190 
2191  unsigned Reg = State->AllocateReg(GPRArgRegs);
2192  if (!Reg)
2193  return;
2194 
2195  unsigned AlignInRegs = Align / 4;
2196  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2197  for (unsigned i = 0; i < Waste; ++i)
2198  Reg = State->AllocateReg(GPRArgRegs);
2199 
2200  if (!Reg)
2201  return;
2202 
2203  unsigned Excess = 4 * (ARM::R4 - Reg);
2204 
2205  // Special case when NSAA != SP and parameter size greater than size of
2206  // all remained GPR regs. In that case we can't split parameter, we must
2207  // send it to stack. We also must set NCRN to R4, so waste all
2208  // remained registers.
2209  const unsigned NSAAOffset = State->getNextStackOffset();
2210  if (NSAAOffset != 0 && Size > Excess) {
2211  while (State->AllocateReg(GPRArgRegs))
2212  ;
2213  return;
2214  }
2215 
2216  // First register for byval parameter is the first register that wasn't
2217  // allocated before this method call, so it would be "reg".
2218  // If parameter is small enough to be saved in range [reg, r4), then
2219  // the end (first after last) register would be reg + param-size-in-regs,
2220  // else parameter would be splitted between registers and stack,
2221  // end register would be r4 in this case.
2222  unsigned ByValRegBegin = Reg;
2223  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2224  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2225  // Note, first register is allocated in the beginning of function already,
2226  // allocate remained amount of registers we need.
2227  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2228  State->AllocateReg(GPRArgRegs);
2229  // A byval parameter that is split between registers and memory needs its
2230  // size truncated here.
2231  // In the case where the entire structure fits in registers, we set the
2232  // size in memory to zero.
2233  Size = std::max<int>(Size - Excess, 0);
2234 }
2235 
2236 /// MatchingStackOffset - Return true if the given stack call argument is
2237 /// already available in the same position (relatively) of the caller's
2238 /// incoming argument stack.
2239 static
2242  const TargetInstrInfo *TII) {
2243  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2244  int FI = std::numeric_limits<int>::max();
2245  if (Arg.getOpcode() == ISD::CopyFromReg) {
2246  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2248  return false;
2249  MachineInstr *Def = MRI->getVRegDef(VR);
2250  if (!Def)
2251  return false;
2252  if (!Flags.isByVal()) {
2253  if (!TII->isLoadFromStackSlot(*Def, FI))
2254  return false;
2255  } else {
2256  return false;
2257  }
2258  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2259  if (Flags.isByVal())
2260  // ByVal argument is passed in as a pointer but it's now being
2261  // dereferenced. e.g.
2262  // define @foo(%struct.X* %A) {
2263  // tail call @bar(%struct.X* byval %A)
2264  // }
2265  return false;
2266  SDValue Ptr = Ld->getBasePtr();
2267  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2268  if (!FINode)
2269  return false;
2270  FI = FINode->getIndex();
2271  } else
2272  return false;
2273 
2275  if (!MFI.isFixedObjectIndex(FI))
2276  return false;
2277  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2278 }
2279 
2280 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2281 /// for tail call optimization. Targets which want to do tail call
2282 /// optimization should implement this function.
2283 bool
2284 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2285  CallingConv::ID CalleeCC,
2286  bool isVarArg,
2287  bool isCalleeStructRet,
2288  bool isCallerStructRet,
2289  const SmallVectorImpl<ISD::OutputArg> &Outs,
2290  const SmallVectorImpl<SDValue> &OutVals,
2291  const SmallVectorImpl<ISD::InputArg> &Ins,
2292  SelectionDAG& DAG) const {
2293  MachineFunction &MF = DAG.getMachineFunction();
2294  const Function &CallerF = MF.getFunction();
2295  CallingConv::ID CallerCC = CallerF.getCallingConv();
2296 
2297  assert(Subtarget->supportsTailCall());
2298 
2299  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2300  // to the call take up r0-r3. The reason is that there are no legal registers
2301  // left to hold the pointer to the function to be called.
2302  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2303  !isa<GlobalAddressSDNode>(Callee.getNode()))
2304  return false;
2305 
2306  // Look for obvious safe cases to perform tail call optimization that do not
2307  // require ABI changes. This is what gcc calls sibcall.
2308 
2309  // Exception-handling functions need a special set of instructions to indicate
2310  // a return to the hardware. Tail-calling another function would probably
2311  // break this.
2312  if (CallerF.hasFnAttribute("interrupt"))
2313  return false;
2314 
2315  // Also avoid sibcall optimization if either caller or callee uses struct
2316  // return semantics.
2317  if (isCalleeStructRet || isCallerStructRet)
2318  return false;
2319 
2320  // Externally-defined functions with weak linkage should not be
2321  // tail-called on ARM when the OS does not support dynamic
2322  // pre-emption of symbols, as the AAELF spec requires normal calls
2323  // to undefined weak functions to be replaced with a NOP or jump to the
2324  // next instruction. The behaviour of branch instructions in this
2325  // situation (as used for tail calls) is implementation-defined, so we
2326  // cannot rely on the linker replacing the tail call with a return.
2327  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2328  const GlobalValue *GV = G->getGlobal();
2329  const Triple &TT = getTargetMachine().getTargetTriple();
2330  if (GV->hasExternalWeakLinkage() &&
2331  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2332  return false;
2333  }
2334 
2335  // Check that the call results are passed in the same way.
2336  LLVMContext &C = *DAG.getContext();
2337  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2338  CCAssignFnForReturn(CalleeCC, isVarArg),
2339  CCAssignFnForReturn(CallerCC, isVarArg)))
2340  return false;
2341  // The callee has to preserve all registers the caller needs to preserve.
2342  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2343  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2344  if (CalleeCC != CallerCC) {
2345  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2346  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2347  return false;
2348  }
2349 
2350  // If Caller's vararg or byval argument has been split between registers and
2351  // stack, do not perform tail call, since part of the argument is in caller's
2352  // local frame.
2353  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2354  if (AFI_Caller->getArgRegsSaveSize())
2355  return false;
2356 
2357  // If the callee takes no arguments then go on to check the results of the
2358  // call.
2359  if (!Outs.empty()) {
2360  // Check if stack adjustment is needed. For now, do not do this if any
2361  // argument is passed on the stack.
2363  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2364  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2365  if (CCInfo.getNextStackOffset()) {
2366  // Check if the arguments are already laid out in the right way as
2367  // the caller's fixed stack objects.
2368  MachineFrameInfo &MFI = MF.getFrameInfo();
2369  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2370  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2371  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2372  i != e;
2373  ++i, ++realArgIdx) {
2374  CCValAssign &VA = ArgLocs[i];
2375  EVT RegVT = VA.getLocVT();
2376  SDValue Arg = OutVals[realArgIdx];
2377  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2378  if (VA.getLocInfo() == CCValAssign::Indirect)
2379  return false;
2380  if (VA.needsCustom()) {
2381  // f64 and vector types are split into multiple registers or
2382  // register/stack-slot combinations. The types will not match
2383  // the registers; give up on memory f64 refs until we figure
2384  // out what to do about this.
2385  if (!VA.isRegLoc())
2386  return false;
2387  if (!ArgLocs[++i].isRegLoc())
2388  return false;
2389  if (RegVT == MVT::v2f64) {
2390  if (!ArgLocs[++i].isRegLoc())
2391  return false;
2392  if (!ArgLocs[++i].isRegLoc())
2393  return false;
2394  }
2395  } else if (!VA.isRegLoc()) {
2396  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2397  MFI, MRI, TII))
2398  return false;
2399  }
2400  }
2401  }
2402 
2403  const MachineRegisterInfo &MRI = MF.getRegInfo();
2404  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2405  return false;
2406  }
2407 
2408  return true;
2409 }
2410 
2411 bool
2412 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2413  MachineFunction &MF, bool isVarArg,
2414  const SmallVectorImpl<ISD::OutputArg> &Outs,
2415  LLVMContext &Context) const {
2417  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2418  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2419 }
2420 
2422  const SDLoc &DL, SelectionDAG &DAG) {
2423  const MachineFunction &MF = DAG.getMachineFunction();
2424  const Function &F = MF.getFunction();
2425 
2426  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2427 
2428  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2429  // version of the "preferred return address". These offsets affect the return
2430  // instruction if this is a return from PL1 without hypervisor extensions.
2431  // IRQ/FIQ: +4 "subs pc, lr, #4"
2432  // SWI: 0 "subs pc, lr, #0"
2433  // ABORT: +4 "subs pc, lr, #4"
2434  // UNDEF: +4/+2 "subs pc, lr, #0"
2435  // UNDEF varies depending on where the exception came from ARM or Thumb
2436  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2437 
2438  int64_t LROffset;
2439  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2440  IntKind == "ABORT")
2441  LROffset = 4;
2442  else if (IntKind == "SWI" || IntKind == "UNDEF")
2443  LROffset = 0;
2444  else
2445  report_fatal_error("Unsupported interrupt attribute. If present, value "
2446  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2447 
2448  RetOps.insert(RetOps.begin() + 1,
2449  DAG.getConstant(LROffset, DL, MVT::i32, false));
2450 
2451  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2452 }
2453 
2454 SDValue
2455 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2456  bool isVarArg,
2457  const SmallVectorImpl<ISD::OutputArg> &Outs,
2458  const SmallVectorImpl<SDValue> &OutVals,
2459  const SDLoc &dl, SelectionDAG &DAG) const {
2460  // CCValAssign - represent the assignment of the return value to a location.
2462 
2463  // CCState - Info about the registers and stack slots.
2464  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2465  *DAG.getContext());
2466 
2467  // Analyze outgoing return values.
2468  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2469 
2470  SDValue Flag;
2471  SmallVector<SDValue, 4> RetOps;
2472  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2473  bool isLittleEndian = Subtarget->isLittle();
2474 
2475  MachineFunction &MF = DAG.getMachineFunction();
2477  AFI->setReturnRegsCount(RVLocs.size());
2478 
2479  // Copy the result values into the output registers.
2480  for (unsigned i = 0, realRVLocIdx = 0;
2481  i != RVLocs.size();
2482  ++i, ++realRVLocIdx) {
2483  CCValAssign &VA = RVLocs[i];
2484  assert(VA.isRegLoc() && "Can only return in registers!");
2485 
2486  SDValue Arg = OutVals[realRVLocIdx];
2487  bool ReturnF16 = false;
2488 
2489  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2490  // Half-precision return values can be returned like this:
2491  //
2492  // t11 f16 = fadd ...
2493  // t12: i16 = bitcast t11
2494  // t13: i32 = zero_extend t12
2495  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2496  //
2497  // to avoid code generation for bitcasts, we simply set Arg to the node
2498  // that produces the f16 value, t11 in this case.
2499  //
2500  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2501  SDValue ZE = Arg.getOperand(0);
2502  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2503  SDValue BC = ZE.getOperand(0);
2504  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2505  Arg = BC.getOperand(0);
2506  ReturnF16 = true;
2507  }
2508  }
2509  }
2510  }
2511 
2512  switch (VA.getLocInfo()) {
2513  default: llvm_unreachable("Unknown loc info!");
2514  case CCValAssign::Full: break;
2515  case CCValAssign::BCvt:
2516  if (!ReturnF16)
2517  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2518  break;
2519  }
2520 
2521  if (VA.needsCustom()) {
2522  if (VA.getLocVT() == MVT::v2f64) {
2523  // Extract the first half and return it in two registers.
2524  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2525  DAG.getConstant(0, dl, MVT::i32));
2526  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2527  DAG.getVTList(MVT::i32, MVT::i32), Half);
2528 
2529  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2530  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2531  Flag);
2532  Flag = Chain.getValue(1);
2533  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2534  VA = RVLocs[++i]; // skip ahead to next loc
2535  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2536  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2537  Flag);
2538  Flag = Chain.getValue(1);
2539  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2540  VA = RVLocs[++i]; // skip ahead to next loc
2541 
2542  // Extract the 2nd half and fall through to handle it as an f64 value.
2543  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2544  DAG.getConstant(1, dl, MVT::i32));
2545  }
2546  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2547  // available.
2548  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2549  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2550  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2551  fmrrd.getValue(isLittleEndian ? 0 : 1),
2552  Flag);
2553  Flag = Chain.getValue(1);
2554  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2555  VA = RVLocs[++i]; // skip ahead to next loc
2556  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2557  fmrrd.getValue(isLittleEndian ? 1 : 0),
2558  Flag);
2559  } else
2560  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2561 
2562  // Guarantee that all emitted copies are
2563  // stuck together, avoiding something bad.
2564  Flag = Chain.getValue(1);
2565  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2566  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2567  }
2568  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2569  const MCPhysReg *I =
2571  if (I) {
2572  for (; *I; ++I) {
2573  if (ARM::GPRRegClass.contains(*I))
2574  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2575  else if (ARM::DPRRegClass.contains(*I))
2576  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2577  else
2578  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2579  }
2580  }
2581 
2582  // Update chain and glue.
2583  RetOps[0] = Chain;
2584  if (Flag.getNode())
2585  RetOps.push_back(Flag);
2586 
2587  // CPUs which aren't M-class use a special sequence to return from
2588  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2589  // though we use "subs pc, lr, #N").
2590  //
2591  // M-class CPUs actually use a normal return sequence with a special
2592  // (hardware-provided) value in LR, so the normal code path works.
2593  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2594  !Subtarget->isMClass()) {
2595  if (Subtarget->isThumb1Only())
2596  report_fatal_error("interrupt attribute is not supported in Thumb1");
2597  return LowerInterruptReturn(RetOps, dl, DAG);
2598  }
2599 
2600  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2601 }
2602 
2603 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2604  if (N->getNumValues() != 1)
2605  return false;
2606  if (!N->hasNUsesOfValue(1, 0))
2607  return false;
2608 
2609  SDValue TCChain = Chain;
2610  SDNode *Copy = *N->use_begin();
2611  if (Copy->getOpcode() == ISD::CopyToReg) {
2612  // If the copy has a glue operand, we conservatively assume it isn't safe to
2613  // perform a tail call.
2614  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2615  return false;
2616  TCChain = Copy->getOperand(0);
2617  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2618  SDNode *VMov = Copy;
2619  // f64 returned in a pair of GPRs.
2620  SmallPtrSet<SDNode*, 2> Copies;
2621  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2622  UI != UE; ++UI) {
2623  if (UI->getOpcode() != ISD::CopyToReg)
2624  return false;
2625  Copies.insert(*UI);
2626  }
2627  if (Copies.size() > 2)
2628  return false;
2629 
2630  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2631  UI != UE; ++UI) {
2632  SDValue UseChain = UI->getOperand(0);
2633  if (Copies.count(UseChain.getNode()))
2634  // Second CopyToReg
2635  Copy = *UI;
2636  else {
2637  // We are at the top of this chain.
2638  // If the copy has a glue operand, we conservatively assume it
2639  // isn't safe to perform a tail call.
2640  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2641  return false;
2642  // First CopyToReg
2643  TCChain = UseChain;
2644  }
2645  }
2646  } else if (Copy->getOpcode() == ISD::BITCAST) {
2647  // f32 returned in a single GPR.
2648  if (!Copy->hasOneUse())
2649  return false;
2650  Copy = *Copy->use_begin();
2651  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2652  return false;
2653  // If the copy has a glue operand, we conservatively assume it isn't safe to
2654  // perform a tail call.
2655  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2656  return false;
2657  TCChain = Copy->getOperand(0);
2658  } else {
2659  return false;
2660  }
2661 
2662  bool HasRet = false;
2663  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2664  UI != UE; ++UI) {
2665  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2666  UI->getOpcode() != ARMISD::INTRET_FLAG)
2667  return false;
2668  HasRet = true;
2669  }
2670 
2671  if (!HasRet)
2672  return false;
2673 
2674  Chain = TCChain;
2675  return true;
2676 }
2677 
2678 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2679  if (!Subtarget->supportsTailCall())
2680  return false;
2681 
2682  auto Attr =
2683  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2684  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2685  return false;
2686 
2687  return true;
2688 }
2689 
2690 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2691 // and pass the lower and high parts through.
2693  SDLoc DL(Op);
2694  SDValue WriteValue = Op->getOperand(2);
2695 
2696  // This function is only supposed to be called for i64 type argument.
2697  assert(WriteValue.getValueType() == MVT::i64
2698  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2699 
2700  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2701  DAG.getConstant(0, DL, MVT::i32));
2702  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2703  DAG.getConstant(1, DL, MVT::i32));
2704  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2705  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2706 }
2707 
2708 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2709 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2710 // one of the above mentioned nodes. It has to be wrapped because otherwise
2711 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2712 // be used to form addressing mode. These wrapped nodes will be selected
2713 // into MOVi.
2714 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2715  SelectionDAG &DAG) const {
2716  EVT PtrVT = Op.getValueType();
2717  // FIXME there is no actual debug info here
2718  SDLoc dl(Op);
2719  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2720  SDValue Res;
2721 
2722  // When generating execute-only code Constant Pools must be promoted to the
2723  // global data section. It's a bit ugly that we can't share them across basic
2724  // blocks, but this way we guarantee that execute-only behaves correct with
2725  // position-independent addressing modes.
2726  if (Subtarget->genExecuteOnly()) {
2727  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2728  auto T = const_cast<Type*>(CP->getType());
2729  auto C = const_cast<Constant*>(CP->getConstVal());
2730  auto M = const_cast<Module*>(DAG.getMachineFunction().
2731  getFunction().getParent());
2732  auto GV = new GlobalVariable(
2733  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2734  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2735  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2736  Twine(AFI->createPICLabelUId())
2737  );
2738  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2739  dl, PtrVT);
2740  return LowerGlobalAddress(GA, DAG);
2741  }
2742 
2743  if (CP->isMachineConstantPoolEntry())
2744  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2745  CP->getAlignment());
2746  else
2747  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2748  CP->getAlignment());
2749  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2750 }
2751 
2754 }
2755 
2756 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2757  SelectionDAG &DAG) const {
2758  MachineFunction &MF = DAG.getMachineFunction();
2760  unsigned ARMPCLabelIndex = 0;
2761  SDLoc DL(Op);
2762  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2763  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2764  SDValue CPAddr;
2765  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2766  if (!IsPositionIndependent) {
2767  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2768  } else {
2769  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2770  ARMPCLabelIndex = AFI->createPICLabelUId();
2771  ARMConstantPoolValue *CPV =
2772  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2773  ARMCP::CPBlockAddress, PCAdj);
2774  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2775  }
2776  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2777  SDValue Result = DAG.getLoad(
2778  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2780  if (!IsPositionIndependent)
2781  return Result;
2782  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2783  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2784 }
2785 
2786 /// \brief Convert a TLS address reference into the correct sequence of loads
2787 /// and calls to compute the variable's address for Darwin, and return an
2788 /// SDValue containing the final node.
2789 
2790 /// Darwin only has one TLS scheme which must be capable of dealing with the
2791 /// fully general situation, in the worst case. This means:
2792 /// + "extern __thread" declaration.
2793 /// + Defined in a possibly unknown dynamic library.
2794 ///
2795 /// The general system is that each __thread variable has a [3 x i32] descriptor
2796 /// which contains information used by the runtime to calculate the address. The
2797 /// only part of this the compiler needs to know about is the first word, which
2798 /// contains a function pointer that must be called with the address of the
2799 /// entire descriptor in "r0".
2800 ///
2801 /// Since this descriptor may be in a different unit, in general access must
2802 /// proceed along the usual ARM rules. A common sequence to produce is:
2803 ///
2804 /// movw rT1, :lower16:_var$non_lazy_ptr
2805 /// movt rT1, :upper16:_var$non_lazy_ptr
2806 /// ldr r0, [rT1]
2807 /// ldr rT2, [r0]
2808 /// blx rT2
2809 /// [...address now in r0...]
2810 SDValue
2811 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2812  SelectionDAG &DAG) const {
2813  assert(Subtarget->isTargetDarwin() &&
2814  "This function expects a Darwin target");
2815  SDLoc DL(Op);
2816 
2817  // First step is to get the address of the actua global symbol. This is where
2818  // the TLS descriptor lives.
2819  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2820 
2821  // The first entry in the descriptor is a function pointer that we must call
2822  // to obtain the address of the variable.
2823  SDValue Chain = DAG.getEntryNode();
2824  SDValue FuncTLVGet = DAG.getLoad(
2825  MVT::i32, DL, Chain, DescAddr,
2827  /* Alignment = */ 4,
2830  Chain = FuncTLVGet.getValue(1);
2831 
2833  MachineFrameInfo &MFI = F.getFrameInfo();
2834  MFI.setAdjustsStack(true);
2835 
2836  // TLS calls preserve all registers except those that absolutely must be
2837  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2838  // silly).
2839  auto TRI =
2840  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2841  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2842  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2843 
2844  // Finally, we can make the call. This is just a degenerate version of a
2845  // normal AArch64 call node: r0 takes the address of the descriptor, and
2846  // returns the address of the variable in this thread.
2847  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2848  Chain =
2850  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2851  DAG.getRegisterMask(Mask), Chain.getValue(1));
2852  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2853 }
2854 
2855 SDValue
2856 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2857  SelectionDAG &DAG) const {
2858  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2859 
2860  SDValue Chain = DAG.getEntryNode();
2861  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2862  SDLoc DL(Op);
2863 
2864  // Load the current TEB (thread environment block)
2865  SDValue Ops[] = {Chain,
2866  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2867  DAG.getConstant(15, DL, MVT::i32),
2868  DAG.getConstant(0, DL, MVT::i32),
2869  DAG.getConstant(13, DL, MVT::i32),
2870  DAG.getConstant(0, DL, MVT::i32),
2871  DAG.getConstant(2, DL, MVT::i32)};
2872  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2873  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2874 
2875  SDValue TEB = CurrentTEB.getValue(0);
2876  Chain = CurrentTEB.getValue(1);
2877 
2878  // Load the ThreadLocalStoragePointer from the TEB
2879  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2880  SDValue TLSArray =
2881  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2882  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2883 
2884  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2885  // offset into the TLSArray.
2886 
2887  // Load the TLS index from the C runtime
2888  SDValue TLSIndex =
2889  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2890  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2891  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2892 
2893  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2894  DAG.getConstant(2, DL, MVT::i32));
2895  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2896  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2897  MachinePointerInfo());
2898 
2899  // Get the offset of the start of the .tls section (section base)
2900  const auto *GA = cast<GlobalAddressSDNode>(Op);
2901  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2902  SDValue Offset = DAG.getLoad(
2903  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2904  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2906 
2907  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2908 }
2909 
2910 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2911 SDValue
2912 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2913  SelectionDAG &DAG) const {
2914  SDLoc dl(GA);
2915  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2916  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2917  MachineFunction &MF = DAG.getMachineFunction();
2919  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2920  ARMConstantPoolValue *CPV =
2921  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2922  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2923  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2924  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2925  Argument = DAG.getLoad(
2926  PtrVT, dl, DAG.getEntryNode(), Argument,
2928  SDValue Chain = Argument.getValue(1);
2929 
2930  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2931  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2932 
2933  // call __tls_get_addr.
2934  ArgListTy Args;
2935  ArgListEntry Entry;
2936  Entry.Node = Argument;
2937  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2938  Args.push_back(Entry);
2939 
2940  // FIXME: is there useful debug info available here?
2942  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2944  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2945 
2946  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2947  return CallResult.first;
2948 }
2949 
2950 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2951 // "local exec" model.
2952 SDValue
2953 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2954  SelectionDAG &DAG,
2955  TLSModel::Model model) const {
2956  const GlobalValue *GV = GA->getGlobal();
2957  SDLoc dl(GA);
2958  SDValue Offset;
2959  SDValue Chain = DAG.getEntryNode();
2960  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2961  // Get the Thread Pointer
2963 
2964  if (model == TLSModel::InitialExec) {
2965  MachineFunction &MF = DAG.getMachineFunction();
2967  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2968  // Initial exec model.
2969  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2970  ARMConstantPoolValue *CPV =
2971  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2973  true);
2974  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2975  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2976  Offset = DAG.getLoad(
2977  PtrVT, dl, Chain, Offset,
2979  Chain = Offset.getValue(1);
2980 
2981  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2982  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2983 
2984  Offset = DAG.getLoad(
2985  PtrVT, dl, Chain, Offset,
2987  } else {
2988  // local exec model
2989  assert(model == TLSModel::LocalExec);
2990  ARMConstantPoolValue *CPV =
2992  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2993  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2994  Offset = DAG.getLoad(
2995  PtrVT, dl, Chain, Offset,
2997  }
2998 
2999  // The address of the thread local variable is the add of the thread
3000  // pointer with the offset of the variable.
3001  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3002 }
3003 
3004 SDValue
3005 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3006  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3007  if (DAG.getTarget().Options.EmulatedTLS)
3008  return LowerToTLSEmulatedModel(GA, DAG);
3009 
3010  if (Subtarget->isTargetDarwin())
3011  return LowerGlobalTLSAddressDarwin(Op, DAG);
3012 
3013  if (Subtarget->isTargetWindows())
3014  return LowerGlobalTLSAddressWindows(Op, DAG);
3015 
3016  // TODO: implement the "local dynamic" model
3017  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3019 
3020  switch (model) {
3023  return LowerToTLSGeneralDynamicModel(GA, DAG);
3024  case TLSModel::InitialExec:
3025  case TLSModel::LocalExec:
3026  return LowerToTLSExecModels(GA, DAG, model);
3027  }
3028  llvm_unreachable("bogus TLS model");
3029 }
3030 
3031 /// Return true if all users of V are within function F, looking through
3032 /// ConstantExprs.
3033 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3034  SmallVector<const User*,4> Worklist;
3035  for (auto *U : V->users())
3036  Worklist.push_back(U);
3037  while (!Worklist.empty()) {
3038  auto *U = Worklist.pop_back_val();
3039  if (isa<ConstantExpr>(U)) {
3040  for (auto *UU : U->users())
3041  Worklist.push_back(UU);
3042  continue;
3043  }
3044 
3045  auto *I = dyn_cast<Instruction>(U);
3046  if (!I || I->getParent()->getParent() != F)
3047  return false;
3048  }
3049  return true;
3050 }
3051 
3052 /// Return true if all users of V are within some (any) function, looking through
3053 /// ConstantExprs. In other words, are there any global constant users?
3054 static bool allUsersAreInFunctions(const Value *V) {
3055  SmallVector<const User*,4> Worklist;
3056  for (auto *U : V->users())
3057  Worklist.push_back(U);
3058  while (!Worklist.empty()) {
3059  auto *U = Worklist.pop_back_val();
3060  if (isa<ConstantExpr>(U)) {
3061  for (auto *UU : U->users())
3062  Worklist.push_back(UU);
3063  continue;
3064  }
3065 
3066  if (!isa<Instruction>(U))
3067  return false;
3068  }
3069  return true;
3070 }
3071 
3072 // Return true if T is an integer, float or an array/vector of either.
3073 static bool isSimpleType(Type *T) {
3074  if (T->isIntegerTy() || T->isFloatingPointTy())
3075  return true;
3076  Type *SubT = nullptr;
3077  if (T->isArrayTy())
3078  SubT = T->getArrayElementType();
3079  else if (T->isVectorTy())
3080  SubT = T->getVectorElementType();
3081  else
3082  return false;
3083  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3084 }
3085 
3087  EVT PtrVT, const SDLoc &dl) {
3088  // If we're creating a pool entry for a constant global with unnamed address,
3089  // and the global is small enough, we can emit it inline into the constant pool
3090  // to save ourselves an indirection.
3091  //
3092  // This is a win if the constant is only used in one function (so it doesn't
3093  // need to be duplicated) or duplicating the constant wouldn't increase code
3094  // size (implying the constant is no larger than 4 bytes).
3095  const Function &F = DAG.getMachineFunction().getFunction();
3096 
3097  // We rely on this decision to inline being idemopotent and unrelated to the
3098  // use-site. We know that if we inline a variable at one use site, we'll
3099  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3100  // doesn't know about this optimization, so bail out if it's enabled else
3101  // we could decide to inline here (and thus never emit the GV) but require
3102  // the GV from fast-isel generated code.
3103  if (!EnableConstpoolPromotion ||
3105  return SDValue();
3106 
3107  auto *GVar = dyn_cast<GlobalVariable>(GV);
3108  if (!GVar || !GVar->hasInitializer() ||
3109  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3110  !GVar->hasLocalLinkage())
3111  return SDValue();
3112 
3113  // Ensure that we don't try and inline any type that contains pointers. If
3114  // we inline a value that contains relocations, we move the relocations from
3115  // .data to .text which is not ideal.
3116  auto *Init = GVar->getInitializer();
3117  if (!isSimpleType(Init->getType()))
3118  return SDValue();
3119 
3120  // The constant islands pass can only really deal with alignment requests
3121  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3122  // any type wanting greater alignment requirements than 4 bytes. We also
3123  // can only promote constants that are multiples of 4 bytes in size or
3124  // are paddable to a multiple of 4. Currently we only try and pad constants
3125  // that are strings for simplicity.
3126  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3127  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3128  unsigned Align = GVar->getAlignment();
3129  unsigned RequiredPadding = 4 - (Size % 4);
3130  bool PaddingPossible =
3131  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3132  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3133  Size == 0)
3134  return SDValue();
3135 
3136  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3137  MachineFunction &MF = DAG.getMachineFunction();
3139 
3140  // We can't bloat the constant pool too much, else the ConstantIslands pass
3141  // may fail to converge. If we haven't promoted this global yet (it may have
3142  // multiple uses), and promoting it would increase the constant pool size (Sz
3143  // > 4), ensure we have space to do so up to MaxTotal.
3144  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3145  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3147  return SDValue();
3148 
3149  // This is only valid if all users are in a single function OR it has users
3150  // in multiple functions but it no larger than a pointer. We also check if
3151  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3152  // address taken.
3153  if (!allUsersAreInFunction(GVar, &F) &&
3154  !(Size <= 4 && allUsersAreInFunctions(GVar)))
3155  return SDValue();
3156 
3157  // We're going to inline this global. Pad it out if needed.
3158  if (RequiredPadding != 4) {
3159  StringRef S = CDAInit->getAsString();
3160 
3162  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3163  while (RequiredPadding--)
3164  V.push_back(0);
3165  Init = ConstantDataArray::get(*DAG.getContext(), V);
3166  }
3167 
3168  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3169  SDValue CPAddr =
3170  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3171  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3174  PaddedSize - 4);
3175  }
3176  ++NumConstpoolPromoted;
3177  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3178 }
3179 
3181  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3182  GV = GA->getBaseObject();
3183  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3184  isa<Function>(GV);
3185 }
3186 
3187 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3188  SelectionDAG &DAG) const {
3189  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3190  default: llvm_unreachable("unknown object format");
3191  case Triple::COFF:
3192  return LowerGlobalAddressWindows(Op, DAG);
3193  case Triple::ELF:
3194  return LowerGlobalAddressELF(Op, DAG);
3195  case Triple::MachO:
3196  return LowerGlobalAddressDarwin(Op, DAG);
3197  }
3198 }
3199 
3200 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3201  SelectionDAG &DAG) const {
3202  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3203  SDLoc dl(Op);
3204  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3205  const TargetMachine &TM = getTargetMachine();
3206  bool IsRO = isReadOnly(GV);
3207 
3208  // promoteToConstantPool only if not generating XO text section
3209  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3210  if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3211  return V;
3212 
3213  if (isPositionIndependent()) {
3214  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3215  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3216  UseGOT_PREL ? ARMII::MO_GOT : 0);
3217  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3218  if (UseGOT_PREL)
3219  Result =
3220  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3222  return Result;
3223  } else if (Subtarget->isROPI() && IsRO) {
3224  // PC-relative.
3225  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3226  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3227  return Result;
3228  } else if (Subtarget->isRWPI() && !IsRO) {
3229  // SB-relative.
3230  SDValue RelAddr;
3231  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3232  ++NumMovwMovt;
3233  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3234  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3235  } else { // use literal pool for address constant
3236  ARMConstantPoolValue *CPV =
3238  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3239  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3240  RelAddr = DAG.getLoad(
3241  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3243  }
3244  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3245  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3246  return Result;
3247  }
3248 
3249  // If we have T2 ops, we can materialize the address directly via movt/movw
3250  // pair. This is always cheaper.
3251  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3252  ++NumMovwMovt;
3253  // FIXME: Once remat is capable of dealing with instructions with register
3254  // operands, expand this into two nodes.
3255  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3256  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3257  } else {
3258  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3259  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3260  return DAG.getLoad(
3261  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3263  }
3264 }
3265 
3266 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3267  SelectionDAG &DAG) const {
3268  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3269  "ROPI/RWPI not currently supported for Darwin");
3270  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3271  SDLoc dl(Op);
3272  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3273 
3274  if (Subtarget->useMovt(DAG.getMachineFunction()))
3275  ++NumMovwMovt;
3276 
3277  // FIXME: Once remat is capable of dealing with instructions with register
3278  // operands, expand this into multiple nodes
3279  unsigned Wrapper =
3281 
3282  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3283  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3284 
3285  if (Subtarget->isGVIndirectSymbol(GV))
3286  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3288  return Result;
3289 }
3290 
3291 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3292  SelectionDAG &DAG) const {
3293  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3294  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3295  "Windows on ARM expects to use movw/movt");
3296  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3297  "ROPI/RWPI not currently supported for Windows");
3298 
3299  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3300  const ARMII::TOF TargetFlags =
3301  (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3302  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3303  SDValue Result;
3304  SDLoc DL(Op);
3305 
3306  ++NumMovwMovt;
3307 
3308  // FIXME: Once remat is capable of dealing with instructions with register
3309  // operands, expand this into two nodes.
3310  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3311  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3312  TargetFlags));
3313  if (GV->hasDLLImportStorageClass())
3314  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3316  return Result;
3317 }
3318 
3319 SDValue
3320 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3321  SDLoc dl(Op);
3322  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3323  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3324  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3325  Op.getOperand(1), Val);
3326 }
3327 
3328 SDValue
3329 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3330  SDLoc dl(Op);
3331  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3332  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3333 }
3334 
3335 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3336  SelectionDAG &DAG) const {
3337  SDLoc dl(Op);
3339  Op.getOperand(0));
3340 }
3341 
3342 SDValue
3343 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3344  const ARMSubtarget *Subtarget) const {
3345  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3346  SDLoc dl(Op);
3347  switch (IntNo) {
3348  default: return SDValue(); // Don't custom lower most intrinsics.
3349  case Intrinsic::thread_pointer: {
3350  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3351  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3352  }
3353  case Intrinsic::eh_sjlj_lsda: {
3354  MachineFunction &MF = DAG.getMachineFunction();
3356  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3357  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3358  SDValue CPAddr;
3359  bool IsPositionIndependent = isPositionIndependent();
3360  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3361  ARMConstantPoolValue *CPV =
3362  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3363  ARMCP::CPLSDA, PCAdj);
3364  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3365  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3366  SDValue Result = DAG.getLoad(
3367  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3369 
3370  if (IsPositionIndependent) {
3371  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3372  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3373  }
3374  return Result;
3375  }
3376  case Intrinsic::arm_neon_vabs:
3377  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3378  Op.getOperand(1));
3379  case Intrinsic::arm_neon_vmulls:
3380  case Intrinsic::arm_neon_vmullu: {
3381  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3383  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3384  Op.getOperand(1), Op.getOperand(2));
3385  }
3386  case Intrinsic::arm_neon_vminnm:
3387  case Intrinsic::arm_neon_vmaxnm: {
3388  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3390  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3391  Op.getOperand(1), Op.getOperand(2));
3392  }
3393  case Intrinsic::arm_neon_vminu:
3394  case Intrinsic::arm_neon_vmaxu: {
3395  if (Op.getValueType().isFloatingPoint())
3396  return SDValue();
3397  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3398  ? ISD::UMIN : ISD::UMAX;
3399  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3400  Op.getOperand(1), Op.getOperand(2));
3401  }
3402  case Intrinsic::arm_neon_vmins:
3403  case Intrinsic::arm_neon_vmaxs: {
3404  // v{min,max}s is overloaded between signed integers and floats.
3405  if (!Op.getValueType().isFloatingPoint()) {
3406  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3407  ? ISD::SMIN : ISD::SMAX;
3408  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3409  Op.getOperand(1), Op.getOperand(2));
3410  }
3411  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3413  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3414  Op.getOperand(1), Op.getOperand(2));
3415  }
3416  case Intrinsic::arm_neon_vtbl1:
3417  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3418  Op.getOperand(1), Op.getOperand(2));
3419  case Intrinsic::arm_neon_vtbl2:
3420  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3421  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3422  }
3423 }
3424 
3426  const ARMSubtarget *Subtarget) {
3427  SDLoc dl(Op);
3428  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3429  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3430  if (SSID == SyncScope::SingleThread)
3431  return Op;
3432 
3433  if (!Subtarget->hasDataBarrier()) {
3434  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3435  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3436  // here.
3437  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3438  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3439  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3440  DAG.getConstant(0, dl, MVT::i32));
3441  }
3442 
3443  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3444  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3445  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3446  if (Subtarget->isMClass()) {
3447  // Only a full system barrier exists in the M-class architectures.
3448  Domain = ARM_MB::SY;
3449  } else if (Subtarget->preferISHSTBarriers() &&
3450  Ord == AtomicOrdering::Release) {
3451  // Swift happens to implement ISHST barriers in a way that's compatible with
3452  // Release semantics but weaker than ISH so we'd be fools not to use
3453  // it. Beware: other processors probably don't!
3454  Domain = ARM_MB::ISHST;
3455  }
3456 
3457  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3458  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3459  DAG.getConstant(Domain, dl, MVT::i32));
3460 }
3461 
3463  const ARMSubtarget *Subtarget) {
3464  // ARM pre v5TE and Thumb1 does not have preload instructions.
3465  if (!(Subtarget->isThumb2() ||
3466  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3467  // Just preserve the chain.
3468  return Op.getOperand(0);
3469 
3470  SDLoc dl(Op);
3471  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3472  if (!isRead &&
3473  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3474  // ARMv7 with MP extension has PLDW.
3475  return Op.getOperand(0);
3476 
3477  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3478  if (Subtarget->isThumb()) {
3479  // Invert the bits.
3480  isRead = ~isRead & 1;
3481  isData = ~isData & 1;
3482  }
3483 
3484  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3485  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3486  DAG.getConstant(isData, dl, MVT::i32));
3487 }
3488 
3490  MachineFunction &MF = DAG.getMachineFunction();
3491  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3492 
3493  // vastart just stores the address of the VarArgsFrameIndex slot into the
3494  // memory location argument.
3495  SDLoc dl(Op);
3496  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3497  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3498  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3499  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3500  MachinePointerInfo(SV));
3501 }
3502 
3503 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3504  CCValAssign &NextVA,
3505  SDValue &Root,
3506  SelectionDAG &DAG,
3507  const SDLoc &dl) const {
3508  MachineFunction &MF = DAG.getMachineFunction();
3510 
3511  const TargetRegisterClass *RC;
3512  if (AFI->isThumb1OnlyFunction())
3513  RC = &ARM::tGPRRegClass;
3514  else
3515  RC = &ARM::GPRRegClass;
3516 
3517  // Transform the arguments stored in physical registers into virtual ones.
3518  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3519  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3520 
3521  SDValue ArgValue2;
3522  if (NextVA.isMemLoc()) {
3523  MachineFrameInfo &MFI = MF.getFrameInfo();
3524  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3525 
3526  // Create load node to retrieve arguments from the stack.
3527  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3528  ArgValue2 = DAG.getLoad(
3529  MVT::i32, dl, Root, FIN,
3531  } else {
3532  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3533  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3534  }
3535  if (!Subtarget->isLittle())
3536  std::swap (ArgValue, ArgValue2);
3537  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3538 }
3539 
3540 // The remaining GPRs hold either the beginning of variable-argument
3541 // data, or the beginning of an aggregate passed by value (usually
3542 // byval). Either way, we allocate stack slots adjacent to the data
3543 // provided by our caller, and store the unallocated registers there.
3544 // If this is a variadic function, the va_list pointer will begin with
3545 // these values; otherwise, this reassembles a (byval) structure that
3546 // was split between registers and memory.
3547 // Return: The frame index registers were stored into.
3548 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3549  const SDLoc &dl, SDValue &Chain,
3550  const Value *OrigArg,
3551  unsigned InRegsParamRecordIdx,
3552  int ArgOffset, unsigned ArgSize) const {
3553  // Currently, two use-cases possible:
3554  // Case #1. Non-var-args function, and we meet first byval parameter.
3555  // Setup first unallocated register as first byval register;
3556  // eat all remained registers
3557  // (these two actions are performed by HandleByVal method).
3558  // Then, here, we initialize stack frame with
3559  // "store-reg" instructions.
3560  // Case #2. Var-args function, that doesn't contain byval parameters.
3561  // The same: eat all remained unallocated registers,
3562  // initialize stack frame.
3563 
3564  MachineFunction &MF = DAG.getMachineFunction();
3565  MachineFrameInfo &MFI = MF.getFrameInfo();
3567  unsigned RBegin, REnd;
3568  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3569  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3570  } else {
3571  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3572  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3573  REnd = ARM::R4;
3574  }
3575 
3576  if (REnd != RBegin)
3577  ArgOffset = -4 * (ARM::R4 - RBegin);
3578 
3579  auto PtrVT = getPointerTy(DAG.getDataLayout());
3580  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3581  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3582 
3583  SmallVector<SDValue, 4> MemOps;
3584  const TargetRegisterClass *RC =
3585  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3586 
3587  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3588  unsigned VReg = MF.addLiveIn(Reg, RC);
3589  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3590  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3591  MachinePointerInfo(OrigArg, 4 * i));
3592  MemOps.push_back(Store);
3593  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3594  }
3595 
3596  if (!MemOps.empty())
3597  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3598  return FrameIndex;
3599 }
3600 
3601 // Setup stack frame, the va_list pointer will start from.
3602 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3603  const SDLoc &dl, SDValue &Chain,
3604  unsigned ArgOffset,
3605  unsigned TotalArgRegsSaveSize,
3606  bool ForceMutable) const {
3607  MachineFunction &MF = DAG.getMachineFunction();
3609 
3610  // Try to store any remaining integer argument regs
3611  // to their spots on the stack so that they may be loaded by dereferencing
3612  // the result of va_next.
3613  // If there is no regs to be stored, just point address after last
3614  // argument passed via stack.
3615  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3616  CCInfo.getInRegsParamsCount(),
3617  CCInfo.getNextStackOffset(), 4);
3618  AFI->setVarArgsFrameIndex(FrameIndex);
3619 }
3620 
3621 SDValue ARMTargetLowering::LowerFormalArguments(
3622  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3623  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3624  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3625  MachineFunction &MF = DAG.getMachineFunction();
3626  MachineFrameInfo &MFI = MF.getFrameInfo();
3627 
3629 
3630  // Assign locations to all of the incoming arguments.
3632  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3633  *DAG.getContext());
3634  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3635 
3636  SmallVector<SDValue, 16> ArgValues;
3637  SDValue ArgValue;
3639  unsigned CurArgIdx = 0;
3640 
3641  // Initially ArgRegsSaveSize is zero.
3642  // Then we increase this value each time we meet byval parameter.
3643  // We also increase this value in case of varargs function.
3644  AFI->setArgRegsSaveSize(0);
3645 
3646  // Calculate the amount of stack space that we need to allocate to store
3647  // byval and variadic arguments that are passed in registers.
3648  // We need to know this before we allocate the first byval or variadic
3649  // argument, as they will be allocated a stack slot below the CFA (Canonical
3650  // Frame Address, the stack pointer at entry to the function).
3651  unsigned ArgRegBegin = ARM::R4;
3652  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3653  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3654  break;
3655 
3656  CCValAssign &VA = ArgLocs[i];
3657  unsigned Index = VA.getValNo();
3658  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3659  if (!Flags.isByVal())
3660  continue;
3661 
3662  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3663  unsigned RBegin, REnd;
3664  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3665  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3666 
3667  CCInfo.nextInRegsParam();
3668  }
3669  CCInfo.rewindByValRegsInfo();
3670 
3671  int lastInsIndex = -1;
3672  if (isVarArg && MFI.hasVAStart()) {
3673  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3674  if (RegIdx != array_lengthof(GPRArgRegs))
3675  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3676  }
3677 
3678  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3679  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3680  auto PtrVT = getPointerTy(DAG.getDataLayout());
3681 
3682  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3683  CCValAssign &VA = ArgLocs[i];
3684  if (Ins[VA.getValNo()].isOrigArg()) {
3685  std::advance(CurOrigArg,
3686  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3687  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3688  }
3689  // Arguments stored in registers.
3690  if (VA.isRegLoc()) {
3691  EVT RegVT = VA.getLocVT();
3692 
3693  if (VA.needsCustom()) {
3694  // f64 and vector types are split up into multiple registers or
3695  // combinations of registers and stack slots.
3696  if (VA.getLocVT() == MVT::v2f64) {
3697  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3698  Chain, DAG, dl);
3699  VA = ArgLocs[++i]; // skip ahead to next loc
3700  SDValue ArgValue2;
3701  if (VA.isMemLoc()) {
3702  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3703  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3704  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3706  DAG.getMachineFunction(), FI));
3707  } else {
3708  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3709  Chain, DAG, dl);
3710  }
3711  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3712  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3713  ArgValue, ArgValue1,
3714  DAG.getIntPtrConstant(0, dl));
3715  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3716  ArgValue, ArgValue2,
3717  DAG.getIntPtrConstant(1, dl));
3718  } else
3719  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3720  } else {
3721  const TargetRegisterClass *RC;
3722 
3723 
3724  if (RegVT == MVT::f16)
3725  RC = &ARM::HPRRegClass;
3726  else if (RegVT == MVT::f32)
3727  RC = &ARM::SPRRegClass;
3728  else if (RegVT == MVT::f64)
3729  RC = &ARM::DPRRegClass;
3730  else if (RegVT == MVT::v2f64)
3731  RC = &ARM::QPRRegClass;
3732  else if (RegVT == MVT::i32)
3733  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3734  : &ARM::GPRRegClass;
3735  else
3736  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3737 
3738  // Transform the arguments in physical registers into virtual ones.
3739  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3740  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3741  }
3742 
3743  // If this is an 8 or 16-bit value, it is really passed promoted
3744  // to 32 bits. Insert an assert[sz]ext to capture this, then
3745  // truncate to the right size.
3746  switch (VA.getLocInfo()) {
3747  default: llvm_unreachable("Unknown loc info!");
3748  case CCValAssign::Full: break;
3749  case CCValAssign::BCvt:
3750  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3751  break;
3752  case CCValAssign::SExt:
3753  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3754  DAG.getValueType(VA.getValVT()));
3755  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3756  break;
3757  case CCValAssign::ZExt:
3758  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3759  DAG.getValueType(VA.getValVT()));
3760  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3761  break;
3762  }
3763 
3764  InVals.push_back(ArgValue);
3765  } else { // VA.isRegLoc()
3766  // sanity check
3767  assert(VA.isMemLoc());
3768  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3769 
3770  int index = VA.getValNo();
3771 
3772  // Some Ins[] entries become multiple ArgLoc[] entries.
3773  // Process them only once.
3774  if (index != lastInsIndex)
3775  {
3776  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3777  // FIXME: For now, all byval parameter objects are marked mutable.
3778  // This can be changed with more analysis.
3779  // In case of tail call optimization mark all arguments mutable.
3780  // Since they could be overwritten by lowering of arguments in case of
3781  // a tail call.
3782  if (Flags.isByVal()) {
3783  assert(Ins[index].isOrigArg() &&
3784  "Byval arguments cannot be implicit");
3785  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3786 
3787  int FrameIndex = StoreByValRegs(
3788  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3789  VA.getLocMemOffset(), Flags.getByValSize());
3790  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3791  CCInfo.nextInRegsParam();
3792  } else {
3793  unsigned FIOffset = VA.getLocMemOffset();
3794  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3795  FIOffset, true);
3796 
3797  // Create load nodes to retrieve arguments from the stack.
3798  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3799  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3801  DAG.getMachineFunction(), FI)));
3802  }
3803  lastInsIndex = index;
3804  }
3805  }
3806  }
3807 
3808  // varargs
3809  if (isVarArg && MFI.hasVAStart())
3810  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3811  CCInfo.getNextStackOffset(),
3812  TotalArgRegsSaveSize);
3813 
3815 
3816  return Chain;
3817 }
3818 
3819 /// isFloatingPointZero - Return true if this is +0.0.
3820 static bool isFloatingPointZero(SDValue Op) {
3821  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3822  return CFP->getValueAPF().isPosZero();
3823  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3824  // Maybe this has already been legalized into the constant pool?
3825  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3826  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3827  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3828  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3829  return CFP->getValueAPF().isPosZero();
3830  }
3831  } else if (Op->getOpcode() == ISD::BITCAST &&
3832  Op->getValueType(0) == MVT::f64) {
3833  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3834  // created by LowerConstantFP().
3835  SDValue BitcastOp = Op->getOperand(0);
3836  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3837  isNullConstant(BitcastOp->getOperand(0)))
3838  return true;
3839  }
3840  return false;
3841 }
3842 
3843 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3844 /// the given operands.
3845 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3846  SDValue &ARMcc, SelectionDAG &DAG,
3847  const SDLoc &dl) const {
3848  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3849  unsigned C = RHSC->getZExtValue();
3850  if (!isLegalICmpImmediate(C)) {
3851  // Constant does not fit, try adjusting it by one?
3852  switch (CC) {
3853  default: break;
3854  case ISD::SETLT:
3855  case ISD::SETGE:
3856  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3857  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3858  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3859  }
3860  break;
3861  case ISD::SETULT:
3862  case ISD::SETUGE:
3863  if (C != 0 && isLegalICmpImmediate(C-1)) {
3864  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3865  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3866  }
3867  break;
3868  case ISD::SETLE:
3869  case ISD::SETGT:
3870  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3871  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3872  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3873  }
3874  break;
3875  case ISD::SETULE:
3876  case ISD::SETUGT:
3877  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3878  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3879  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3880  }
3881  break;
3882  }
3883  }
3884  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3886  // In ARM and Thumb-2, the compare instructions can shift their second
3887  // operand.
3889  std::swap(LHS, RHS);
3890  }
3891 
3893  ARMISD::NodeType CompareType;
3894  switch (CondCode) {
3895  default:
3896  CompareType = ARMISD::CMP;
3897  break;
3898  case ARMCC::EQ:
3899  case ARMCC::NE:
3900  // Uses only Z Flag
3901  CompareType = ARMISD::CMPZ;
3902  break;
3903  }
3904  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3905  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3906 }
3907 
3908 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3909 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3910  SelectionDAG &DAG, const SDLoc &dl,
3911  bool InvalidOnQNaN) const {
3912  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3913  SDValue Cmp;
3914  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3915  if (!isFloatingPointZero(RHS))
3916  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3917  else
3918  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3919  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3920 }
3921 
3922 /// duplicateCmp - Glue values can have only one use, so this function
3923 /// duplicates a comparison node.
3924 SDValue
3925 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3926  unsigned Opc = Cmp.getOpcode();
3927  SDLoc DL(Cmp);
3928  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3929  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3930 
3931  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3932  Cmp = Cmp.getOperand(0);
3933  Opc = Cmp.getOpcode();
3934  if (Opc == ARMISD::CMPFP)
3935  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3936  Cmp.getOperand(1), Cmp.getOperand(2));
3937  else {
3938  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3939  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3940  Cmp.getOperand(1));
3941  }
3942  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3943 }
3944 
3945 // This function returns three things: the arithmetic computation itself
3946 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3947 // comparison and the condition code define the case in which the arithmetic
3948 // computation *does not* overflow.
3949 std::pair<SDValue, SDValue>
3950 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3951  SDValue &ARMcc) const {
3952  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3953 
3954  SDValue Value, OverflowCmp;
3955  SDValue LHS = Op.getOperand(0);
3956  SDValue RHS = Op.getOperand(1);
3957  SDLoc dl(Op);
3958 
3959  // FIXME: We are currently always generating CMPs because we don't support
3960  // generating CMN through the backend. This is not as good as the natural
3961  // CMP case because it causes a register dependency and cannot be folded
3962  // later.
3963 
3964  switch (Op.getOpcode()) {
3965  default:
3966  llvm_unreachable("Unknown overflow instruction!");
3967  case ISD::SADDO:
3968  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3969  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3970  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3971  break;
3972  case ISD::UADDO:
3973  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3974  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3975  // We do not use it in the USUBO case as Value may not be used.
3976  Value = DAG.getNode(ARMISD::ADDC, dl,
3977  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3978  .getValue(0);
3979  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3980  break;
3981  case ISD::SSUBO:
3982  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3983  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3984  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3985  break;
3986  case ISD::USUBO:
3987  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3988  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3989  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3990  break;
3991  case ISD::UMULO:
3992  // We generate a UMUL_LOHI and then check if the high word is 0.
3993  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
3994  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
3995  DAG.getVTList(Op.getValueType(), Op.getValueType()),
3996  LHS, RHS);
3997  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
3998  DAG.getConstant(0, dl, MVT::i32));
3999  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4000  break;
4001  case ISD::SMULO:
4002  // We generate a SMUL_LOHI and then check if all the bits of the high word
4003  // are the same as the sign bit of the low word.
4004  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4005  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4006  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4007  LHS, RHS);
4008  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4009  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4010  Value.getValue(0),
4011  DAG.getConstant(31, dl, MVT::i32)));
4012  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4013  break;
4014  } // switch (...)
4015 
4016  return std::make_pair(Value, OverflowCmp);
4017 }
4018 
4019 SDValue
4020 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4021  // Let legalize expand this if it isn't a legal type yet.
4023  return SDValue();
4024 
4025  SDValue Value, OverflowCmp;
4026  SDValue ARMcc;
4027  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4028  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4029  SDLoc dl(Op);
4030  // We use 0 and 1 as false and true values.
4031  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4032  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4033  EVT VT = Op.getValueType();
4034 
4035  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4036  ARMcc, CCR, OverflowCmp);
4037 
4038  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4039  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4040 }
4041 
4043  SelectionDAG &DAG) {
4044  SDLoc DL(BoolCarry);
4045  EVT CarryVT = BoolCarry.getValueType();
4046 
4047  // This converts the boolean value carry into the carry flag by doing
4048  // ARMISD::SUBC Carry, 1
4049  return DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(CarryVT, MVT::i32),
4050  BoolCarry, DAG.getConstant(1, DL, CarryVT));
4051 }
4052 
4054  SelectionDAG &DAG) {
4055  SDLoc DL(Flags);
4056 
4057  // Now convert the carry flag into a boolean carry. We do this
4058  // using ARMISD:ADDE 0, 0, Carry
4059  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4060  DAG.getConstant(0, DL, MVT::i32),
4061  DAG.getConstant(0, DL, MVT::i32), Flags);
4062 }
4063 
4064 SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4065  SelectionDAG &DAG) const {
4066  // Let legalize expand this if it isn't a legal type yet.
4068  return SDValue();
4069 
4070  SDValue LHS = Op.getOperand(0);
4071  SDValue RHS = Op.getOperand(1);
4072  SDLoc dl(Op);
4073 
4074