LLVM  8.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
65 #include "llvm/IR/Attributes.h"
66 #include "llvm/IR/CallingConv.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DebugLoc.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/Function.h"
73 #include "llvm/IR/GlobalAlias.h"
74 #include "llvm/IR/GlobalValue.h"
75 #include "llvm/IR/GlobalVariable.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InlineAsm.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/Module.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 
117 #define DEBUG_TYPE "arm-isel"
118 
119 STATISTIC(NumTailCalls, "Number of tail calls");
120 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 STATISTIC(NumConstpoolPromoted,
123  "Number of constants with their storage promoted into constant pools");
124 
125 static cl::opt<bool>
126 ARMInterworking("arm-interworking", cl::Hidden,
127  cl::desc("Enable / disable ARM interworking (for debugging only)"),
128  cl::init(true));
129 
131  "arm-promote-constant", cl::Hidden,
132  cl::desc("Enable / disable promotion of unnamed_addr constants into "
133  "constant pools"),
134  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
136  "arm-promote-constant-max-size", cl::Hidden,
137  cl::desc("Maximum size of constant to promote into a constant pool"),
138  cl::init(64));
140  "arm-promote-constant-max-total", cl::Hidden,
141  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142  cl::init(128));
143 
144 // The APCS parameter registers.
145 static const MCPhysReg GPRArgRegs[] = {
146  ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 };
148 
149 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150  MVT PromotedBitwiseVT) {
151  if (VT != PromotedLdStVT) {
153  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 
156  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157  }
158 
159  MVT ElemTy = VT.getVectorElementType();
160  if (ElemTy != MVT::f64)
164  if (ElemTy == MVT::i32) {
169  } else {
174  }
183  if (VT.isInteger()) {
187  }
188 
189  // Promote all bit-wise operations.
190  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197  }
198 
199  // Neon does not support vector divide/remainder operations.
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211 }
212 
213 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPRRegClass);
215  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 }
217 
218 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219  addRegisterClass(VT, &ARM::DPairRegClass);
220  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 }
222 
224  const ARMSubtarget &STI)
225  : TargetLowering(TM), Subtarget(&STI) {
226  RegInfo = Subtarget->getRegisterInfo();
227  Itins = Subtarget->getInstrItineraryData();
228 
231 
232  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233  !Subtarget->isTargetWatchOS()) {
234  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
239  }
240 
241  if (Subtarget->isTargetMachO()) {
242  // Uses VFP for Thumb libfuncs if available.
243  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245  static const struct {
246  const RTLIB::Libcall Op;
247  const char * const Name;
248  const ISD::CondCode Cond;
249  } LibraryCalls[] = {
250  // Single-precision floating-point arithmetic.
251  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255 
256  // Double-precision floating-point arithmetic.
257  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261 
262  // Single-precision comparisons.
263  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271 
272  // Double-precision comparisons.
273  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281 
282  // Floating-point to integer conversions.
283  // i64 conversions are done via library routines even when generating VFP
284  // instructions, so use the same ones.
285  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289 
290  // Conversions between floating types.
291  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293 
294  // Integer to floating-point conversions.
295  // i64 conversions are done via library routines even when generating VFP
296  // instructions, so use the same ones.
297  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298  // e.g., __floatunsidf vs. __floatunssidfvfp.
299  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303  };
304 
305  for (const auto &LC : LibraryCalls) {
306  setLibcallName(LC.Op, LC.Name);
307  if (LC.Cond != ISD::SETCC_INVALID)
308  setCmpLibcallCC(LC.Op, LC.Cond);
309  }
310  }
311  }
312 
313  // These libcalls are not available in 32-bit.
314  setLibcallName(RTLIB::SHL_I128, nullptr);
315  setLibcallName(RTLIB::SRL_I128, nullptr);
316  setLibcallName(RTLIB::SRA_I128, nullptr);
317 
318  // RTLIB
319  if (Subtarget->isAAPCS_ABI() &&
320  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
321  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
322  static const struct {
323  const RTLIB::Libcall Op;
324  const char * const Name;
325  const CallingConv::ID CC;
326  const ISD::CondCode Cond;
327  } LibraryCalls[] = {
328  // Double-precision floating-point arithmetic helper functions
329  // RTABI chapter 4.1.2, Table 2
330  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
331  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334 
335  // Double-precision floating-point comparison helper functions
336  // RTABI chapter 4.1.2, Table 3
337  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
338  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
339  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
340  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
341  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
342  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
343  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
344  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
345 
346  // Single-precision floating-point arithmetic helper functions
347  // RTABI chapter 4.1.2, Table 4
348  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
349  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
350  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
351  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
352 
353  // Single-precision floating-point comparison helper functions
354  // RTABI chapter 4.1.2, Table 5
355  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
356  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
357  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
358  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
359  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
360  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
361  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
362  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
363 
364  // Floating-point to integer conversions.
365  // RTABI chapter 4.1.2, Table 6
366  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
367  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
368  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
369  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
370  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
371  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
372  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
373  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374 
375  // Conversions between floating types.
376  // RTABI chapter 4.1.2, Table 7
377  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 
381  // Integer to floating-point conversions.
382  // RTABI chapter 4.1.2, Table 8
383  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
384  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
390  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391 
392  // Long long helper functions
393  // RTABI chapter 4.2, Table 9
394  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 
399  // Integer division functions
400  // RTABI chapter 4.3.1
401  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
408  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409  };
410 
411  for (const auto &LC : LibraryCalls) {
412  setLibcallName(LC.Op, LC.Name);
413  setLibcallCallingConv(LC.Op, LC.CC);
414  if (LC.Cond != ISD::SETCC_INVALID)
415  setCmpLibcallCC(LC.Op, LC.Cond);
416  }
417 
418  // EABI dependent RTLIB
419  if (TM.Options.EABIVersion == EABI::EABI4 ||
421  static const struct {
422  const RTLIB::Libcall Op;
423  const char *const Name;
424  const CallingConv::ID CC;
425  const ISD::CondCode Cond;
426  } MemOpsLibraryCalls[] = {
427  // Memory operations
428  // RTABI chapter 4.3.4
430  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
431  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
432  };
433 
434  for (const auto &LC : MemOpsLibraryCalls) {
435  setLibcallName(LC.Op, LC.Name);
436  setLibcallCallingConv(LC.Op, LC.CC);
437  if (LC.Cond != ISD::SETCC_INVALID)
438  setCmpLibcallCC(LC.Op, LC.Cond);
439  }
440  }
441  }
442 
443  if (Subtarget->isTargetWindows()) {
444  static const struct {
445  const RTLIB::Libcall Op;
446  const char * const Name;
447  const CallingConv::ID CC;
448  } LibraryCalls[] = {
449  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
450  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
451  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
452  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
453  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
454  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
455  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
456  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
457  };
458 
459  for (const auto &LC : LibraryCalls) {
460  setLibcallName(LC.Op, LC.Name);
461  setLibcallCallingConv(LC.Op, LC.CC);
462  }
463  }
464 
465  // Use divmod compiler-rt calls for iOS 5.0 and later.
466  if (Subtarget->isTargetMachO() &&
467  !(Subtarget->isTargetIOS() &&
468  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
469  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
470  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
471  }
472 
473  // The half <-> float conversion functions are always soft-float on
474  // non-watchos platforms, but are needed for some targets which use a
475  // hard-float calling convention by default.
476  if (!Subtarget->isTargetWatchABI()) {
477  if (Subtarget->isAAPCS_ABI()) {
478  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
479  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
480  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
481  } else {
482  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
483  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
484  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
485  }
486  }
487 
488  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
489  // a __gnu_ prefix (which is the default).
490  if (Subtarget->isTargetAEABI()) {
491  static const struct {
492  const RTLIB::Libcall Op;
493  const char * const Name;
494  const CallingConv::ID CC;
495  } LibraryCalls[] = {
496  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
497  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
498  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
499  };
500 
501  for (const auto &LC : LibraryCalls) {
502  setLibcallName(LC.Op, LC.Name);
503  setLibcallCallingConv(LC.Op, LC.CC);
504  }
505  }
506 
507  if (Subtarget->isThumb1Only())
508  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
509  else
510  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
511 
512  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
513  !Subtarget->isThumb1Only()) {
514  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
515  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
516  }
517 
518  if (Subtarget->hasFullFP16()) {
519  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
523 
526  }
527 
528  for (MVT VT : MVT::vector_valuetypes()) {
529  for (MVT InnerVT : MVT::vector_valuetypes()) {
530  setTruncStoreAction(VT, InnerVT, Expand);
531  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
532  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
533  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
534  }
535 
540 
542  }
543 
546 
549 
550  if (Subtarget->hasNEON()) {
551  addDRTypeForNEON(MVT::v2f32);
552  addDRTypeForNEON(MVT::v8i8);
553  addDRTypeForNEON(MVT::v4i16);
554  addDRTypeForNEON(MVT::v2i32);
555  addDRTypeForNEON(MVT::v1i64);
556 
557  addQRTypeForNEON(MVT::v4f32);
558  addQRTypeForNEON(MVT::v2f64);
559  addQRTypeForNEON(MVT::v16i8);
560  addQRTypeForNEON(MVT::v8i16);
561  addQRTypeForNEON(MVT::v4i32);
562  addQRTypeForNEON(MVT::v2i64);
563 
564  if (Subtarget->hasFullFP16()) {
565  addQRTypeForNEON(MVT::v8f16);
566  addDRTypeForNEON(MVT::v4f16);
567  }
568 
569  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
570  // neither Neon nor VFP support any arithmetic operations on it.
571  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
572  // supported for v4f32.
576  // FIXME: Code duplication: FDIV and FREM are expanded always, see
577  // ARMTargetLowering::addTypeForNEON method for details.
580  // FIXME: Create unittest.
581  // In another words, find a way when "copysign" appears in DAG with vector
582  // operands.
584  // FIXME: Code duplication: SETCC has custom operation action, see
585  // ARMTargetLowering::addTypeForNEON method for details.
587  // FIXME: Create unittest for FNEG and for FABS.
599  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
606 
621 
622  // Mark v2f32 intrinsics.
637 
638  // Neon does not support some operations on v1i64 and v2i64 types.
640  // Custom handling for some quad-vector types to detect VMULL.
644  // Custom handling for some vector types to avoid expensive expansions
649  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
650  // a destination type that is wider than the source, and nor does
651  // it have a FP_TO_[SU]INT instruction with a narrower destination than
652  // source.
661 
664 
665  // NEON does not have single instruction CTPOP for vectors with element
666  // types wider than 8-bits. However, custom lowering can leverage the
667  // v8i8/v16i8 vcnt instruction.
674 
677 
678  // NEON does not have single instruction CTTZ for vectors.
683 
688 
693 
698 
699  // NEON only has FMA instructions as of VFP4.
700  if (!Subtarget->hasVFP4()) {
703  }
704 
722 
723  // It is legal to extload from v4i8 to v4i16 or v4i32.
725  MVT::v2i32}) {
726  for (MVT VT : MVT::integer_vector_valuetypes()) {
730  }
731  }
732  }
733 
734  if (Subtarget->isFPOnlySP()) {
735  // When targeting a floating-point unit with only single-precision
736  // operations, f64 is legal for the few double-precision instructions which
737  // are present However, no double-precision operations other than moves,
738  // loads and stores are provided by the hardware.
771  }
772 
774 
775  // ARM does not have floating-point extending loads.
776  for (MVT VT : MVT::fp_valuetypes()) {
779  }
780 
781  // ... or truncating stores
785 
786  // ARM does not have i1 sign extending load.
787  for (MVT VT : MVT::integer_valuetypes())
789 
790  // ARM supports all 4 flavors of integer indexed load / store.
791  if (!Subtarget->isThumb1Only()) {
792  for (unsigned im = (unsigned)ISD::PRE_INC;
802  }
803  } else {
804  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
807  }
808 
813 
816 
817  // i64 operation support.
820  if (Subtarget->isThumb1Only()) {
823  }
824  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
825  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
827 
834 
835  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
836  if (Subtarget->isThumb1Only()) {
840  }
841 
842  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
844 
845  // ARM does not have ROTL.
847  for (MVT VT : MVT::vector_valuetypes()) {
850  }
853  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
855 
856  // @llvm.readcyclecounter requires the Performance Monitors extension.
857  // Default to the 0 expansion on unsupported platforms.
858  // FIXME: Technically there are older ARM CPUs that have
859  // implementation-specific ways of obtaining this information.
860  if (Subtarget->hasPerfMon())
862 
863  // Only ARMv6 has BSWAP.
864  if (!Subtarget->hasV6Ops())
866 
867  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
868  : Subtarget->hasDivideInARMMode();
869  if (!hasDivide) {
870  // These are expanded into libcalls if the cpu doesn't have HW divider.
873  }
874 
875  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
878 
881  }
882 
885 
886  // Register based DivRem for AEABI (RTABI 4.2)
887  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
888  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
889  Subtarget->isTargetWindows()) {
892  HasStandaloneRem = false;
893 
894  if (Subtarget->isTargetWindows()) {
895  const struct {
896  const RTLIB::Libcall Op;
897  const char * const Name;
898  const CallingConv::ID CC;
899  } LibraryCalls[] = {
900  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
901  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
902  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
903  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
904 
905  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
906  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
907  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
908  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
909  };
910 
911  for (const auto &LC : LibraryCalls) {
912  setLibcallName(LC.Op, LC.Name);
913  setLibcallCallingConv(LC.Op, LC.CC);
914  }
915  } else {
916  const struct {
917  const RTLIB::Libcall Op;
918  const char * const Name;
919  const CallingConv::ID CC;
920  } LibraryCalls[] = {
921  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
922  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
923  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
924  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
925 
926  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
927  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
928  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
929  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
930  };
931 
932  for (const auto &LC : LibraryCalls) {
933  setLibcallName(LC.Op, LC.Name);
934  setLibcallCallingConv(LC.Op, LC.CC);
935  }
936  }
937 
942  } else {
945  }
946 
947  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
948  for (auto &VT : {MVT::f32, MVT::f64})
950 
955 
957 
958  // Use the default implementation.
965 
966  if (Subtarget->isTargetWindows())
968  else
970 
971  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
972  // the default expansion.
973  InsertFencesForAtomic = false;
974  if (Subtarget->hasAnyDataBarrier() &&
975  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
976  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
977  // to ldrex/strex loops already.
979  if (!Subtarget->isThumb() || !Subtarget->isMClass())
981 
982  // On v8, we have particularly efficient implementations of atomic fences
983  // if they can be combined with nearby atomic loads and stores.
984  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
985  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
986  InsertFencesForAtomic = true;
987  }
988  } else {
989  // If there's anything we can use as a barrier, go through custom lowering
990  // for ATOMIC_FENCE.
991  // If target has DMB in thumb, Fences can be inserted.
992  if (Subtarget->hasDataBarrier())
993  InsertFencesForAtomic = true;
994 
996  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
997 
998  // Set them all for expansion, which will force libcalls.
1011  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1012  // Unordered/Monotonic case.
1013  if (!InsertFencesForAtomic) {
1016  }
1017  }
1018 
1020 
1021  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1022  if (!Subtarget->hasV6Ops()) {
1025  }
1027 
1028  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1029  !Subtarget->isThumb1Only()) {
1030  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1031  // iff target supports vfp2.
1034  }
1035 
1036  // We want to custom lower some of our intrinsics.
1041  if (Subtarget->useSjLjEH())
1042  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1043 
1053  if (Subtarget->hasFullFP16()) {
1057  }
1058 
1060 
1063  if (Subtarget->hasFullFP16())
1068 
1069  // We don't support sin/cos/fmod/copysign/pow
1078  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1079  !Subtarget->isThumb1Only()) {
1082  }
1085 
1086  if (!Subtarget->hasVFP4()) {
1089  }
1090 
1091  // Various VFP goodness
1092  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1093  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1094  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1097  }
1098 
1099  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1100  if (!Subtarget->hasFP16()) {
1103  }
1104  }
1105 
1106  // Use __sincos_stret if available.
1107  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1108  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1111  }
1112 
1113  // FP-ARMv8 implements a lot of rounding-like FP operations.
1114  if (Subtarget->hasFPARMv8()) {
1127 
1128  if (!Subtarget->isFPOnlySP()) {
1137  }
1138  }
1139 
1140  if (Subtarget->hasNEON()) {
1141  // vmin and vmax aren't available in a scalar form, so we use
1142  // a NEON instruction with an undef lane instead.
1151 
1152  if (Subtarget->hasFullFP16()) {
1157 
1162  }
1163  }
1164 
1165  // We have target-specific dag combine patterns for the following nodes:
1166  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1173 
1174  if (Subtarget->hasV6Ops())
1176 
1178 
1179  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1180  !Subtarget->hasVFP2())
1182  else
1184 
1185  //// temporary - rewrite interface to use type
1186  MaxStoresPerMemset = 8;
1188  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1190  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1192 
1193  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1194  // are at least 4 bytes aligned.
1196 
1197  // Prefer likely predicted branches to selects on out-of-order cores.
1198  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1199 
1200  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1201 }
1202 
1204  return Subtarget->useSoftFloat();
1205 }
1206 
1207 // FIXME: It might make sense to define the representative register class as the
1208 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1209 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1210 // SPR's representative would be DPR_VFP2. This should work well if register
1211 // pressure tracking were modified such that a register use would increment the
1212 // pressure of the register class's representative and all of it's super
1213 // classes' representatives transitively. We have not implemented this because
1214 // of the difficulty prior to coalescing of modeling operand register classes
1215 // due to the common occurrence of cross class copies and subregister insertions
1216 // and extractions.
1217 std::pair<const TargetRegisterClass *, uint8_t>
1219  MVT VT) const {
1220  const TargetRegisterClass *RRC = nullptr;
1221  uint8_t Cost = 1;
1222  switch (VT.SimpleTy) {
1223  default:
1225  // Use DPR as representative register class for all floating point
1226  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1227  // the cost is 1 for both f32 and f64.
1228  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1229  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1230  RRC = &ARM::DPRRegClass;
1231  // When NEON is used for SP, only half of the register file is available
1232  // because operations that define both SP and DP results will be constrained
1233  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1234  // coalescing by double-counting the SP regs. See the FIXME above.
1235  if (Subtarget->useNEONForSinglePrecisionFP())
1236  Cost = 2;
1237  break;
1238  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1239  case MVT::v4f32: case MVT::v2f64:
1240  RRC = &ARM::DPRRegClass;
1241  Cost = 2;
1242  break;
1243  case MVT::v4i64:
1244  RRC = &ARM::DPRRegClass;
1245  Cost = 4;
1246  break;
1247  case MVT::v8i64:
1248  RRC = &ARM::DPRRegClass;
1249  Cost = 8;
1250  break;
1251  }
1252  return std::make_pair(RRC, Cost);
1253 }
1254 
1255 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1256  switch ((ARMISD::NodeType)Opcode) {
1257  case ARMISD::FIRST_NUMBER: break;
1258  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1259  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1260  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1261  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1262  case ARMISD::CALL: return "ARMISD::CALL";
1263  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1264  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1265  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1266  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1267  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1268  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1269  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1270  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1271  case ARMISD::CMP: return "ARMISD::CMP";
1272  case ARMISD::CMN: return "ARMISD::CMN";
1273  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1274  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1275  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1276  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1277  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1278 
1279  case ARMISD::CMOV: return "ARMISD::CMOV";
1280 
1281  case ARMISD::SSAT: return "ARMISD::SSAT";
1282  case ARMISD::USAT: return "ARMISD::USAT";
1283 
1284  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1285  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1286  case ARMISD::RRX: return "ARMISD::RRX";
1287 
1288  case ARMISD::ADDC: return "ARMISD::ADDC";
1289  case ARMISD::ADDE: return "ARMISD::ADDE";
1290  case ARMISD::SUBC: return "ARMISD::SUBC";
1291  case ARMISD::SUBE: return "ARMISD::SUBE";
1292 
1293  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1294  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1295  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1296  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1297  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1298 
1299  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1300  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1301  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1302 
1303  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1304 
1305  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1306 
1307  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1308 
1309  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1310 
1311  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1312 
1313  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1314  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1315 
1316  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1317  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1318  case ARMISD::VCGE: return "ARMISD::VCGE";
1319  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1320  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1321  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1322  case ARMISD::VCGT: return "ARMISD::VCGT";
1323  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1324  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1325  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1326  case ARMISD::VTST: return "ARMISD::VTST";
1327 
1328  case ARMISD::VSHL: return "ARMISD::VSHL";
1329  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1330  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1331  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1332  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1333  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1334  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1335  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1336  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1337  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1338  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1339  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1340  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1341  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1342  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1343  case ARMISD::VSLI: return "ARMISD::VSLI";
1344  case ARMISD::VSRI: return "ARMISD::VSRI";
1345  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1346  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1347  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1348  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1349  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1350  case ARMISD::VDUP: return "ARMISD::VDUP";
1351  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1352  case ARMISD::VEXT: return "ARMISD::VEXT";
1353  case ARMISD::VREV64: return "ARMISD::VREV64";
1354  case ARMISD::VREV32: return "ARMISD::VREV32";
1355  case ARMISD::VREV16: return "ARMISD::VREV16";
1356  case ARMISD::VZIP: return "ARMISD::VZIP";
1357  case ARMISD::VUZP: return "ARMISD::VUZP";
1358  case ARMISD::VTRN: return "ARMISD::VTRN";
1359  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1360  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1361  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1362  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1363  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1364  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1365  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1366  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1367  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1368  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1369  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1370  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1371  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1372  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1373  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1374  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1375  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1376  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1377  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1378  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1379  case ARMISD::BFI: return "ARMISD::BFI";
1380  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1381  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1382  case ARMISD::VBSL: return "ARMISD::VBSL";
1383  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1384  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1385  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1386  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1387  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1388  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1389  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1390  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1391  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1392  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1393  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1394  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1395  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1396  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1397  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1398  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1399  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1400  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1401  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1402  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1403  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1404  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1405  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1406  }
1407  return nullptr;
1408 }
1409 
1411  EVT VT) const {
1412  if (!VT.isVector())
1413  return getPointerTy(DL);
1415 }
1416 
1417 /// getRegClassFor - Return the register class that should be used for the
1418 /// specified value type.
1420  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1421  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1422  // load / store 4 to 8 consecutive D registers.
1423  if (Subtarget->hasNEON()) {
1424  if (VT == MVT::v4i64)
1425  return &ARM::QQPRRegClass;
1426  if (VT == MVT::v8i64)
1427  return &ARM::QQQQPRRegClass;
1428  }
1429  return TargetLowering::getRegClassFor(VT);
1430 }
1431 
1432 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1433 // source/dest is aligned and the copy size is large enough. We therefore want
1434 // to align such objects passed to memory intrinsics.
1436  unsigned &PrefAlign) const {
1437  if (!isa<MemIntrinsic>(CI))
1438  return false;
1439  MinSize = 8;
1440  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1441  // cycle faster than 4-byte aligned LDM.
1442  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1443  return true;
1444 }
1445 
1446 // Create a fast isel object.
1447 FastISel *
1449  const TargetLibraryInfo *libInfo) const {
1450  return ARM::createFastISel(funcInfo, libInfo);
1451 }
1452 
1454  unsigned NumVals = N->getNumValues();
1455  if (!NumVals)
1456  return Sched::RegPressure;
1457 
1458  for (unsigned i = 0; i != NumVals; ++i) {
1459  EVT VT = N->getValueType(i);
1460  if (VT == MVT::Glue || VT == MVT::Other)
1461  continue;
1462  if (VT.isFloatingPoint() || VT.isVector())
1463  return Sched::ILP;
1464  }
1465 
1466  if (!N->isMachineOpcode())
1467  return Sched::RegPressure;
1468 
1469  // Load are scheduled for latency even if there instruction itinerary
1470  // is not available.
1471  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1472  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1473 
1474  if (MCID.getNumDefs() == 0)
1475  return Sched::RegPressure;
1476  if (!Itins->isEmpty() &&
1477  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1478  return Sched::ILP;
1479 
1480  return Sched::RegPressure;
1481 }
1482 
1483 //===----------------------------------------------------------------------===//
1484 // Lowering Code
1485 //===----------------------------------------------------------------------===//
1486 
1487 static bool isSRL16(const SDValue &Op) {
1488  if (Op.getOpcode() != ISD::SRL)
1489  return false;
1490  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1491  return Const->getZExtValue() == 16;
1492  return false;
1493 }
1494 
1495 static bool isSRA16(const SDValue &Op) {
1496  if (Op.getOpcode() != ISD::SRA)
1497  return false;
1498  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1499  return Const->getZExtValue() == 16;
1500  return false;
1501 }
1502 
1503 static bool isSHL16(const SDValue &Op) {
1504  if (Op.getOpcode() != ISD::SHL)
1505  return false;
1506  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1507  return Const->getZExtValue() == 16;
1508  return false;
1509 }
1510 
1511 // Check for a signed 16-bit value. We special case SRA because it makes it
1512 // more simple when also looking for SRAs that aren't sign extending a
1513 // smaller value. Without the check, we'd need to take extra care with
1514 // checking order for some operations.
1515 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1516  if (isSRA16(Op))
1517  return isSHL16(Op.getOperand(0));
1518  return DAG.ComputeNumSignBits(Op) == 17;
1519 }
1520 
1521 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1523  switch (CC) {
1524  default: llvm_unreachable("Unknown condition code!");
1525  case ISD::SETNE: return ARMCC::NE;
1526  case ISD::SETEQ: return ARMCC::EQ;
1527  case ISD::SETGT: return ARMCC::GT;
1528  case ISD::SETGE: return ARMCC::GE;
1529  case ISD::SETLT: return ARMCC::LT;
1530  case ISD::SETLE: return ARMCC::LE;
1531  case ISD::SETUGT: return ARMCC::HI;
1532  case ISD::SETUGE: return ARMCC::HS;
1533  case ISD::SETULT: return ARMCC::LO;
1534  case ISD::SETULE: return ARMCC::LS;
1535  }
1536 }
1537 
1538 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1540  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1541  CondCode2 = ARMCC::AL;
1542  InvalidOnQNaN = true;
1543  switch (CC) {
1544  default: llvm_unreachable("Unknown FP condition!");
1545  case ISD::SETEQ:
1546  case ISD::SETOEQ:
1547  CondCode = ARMCC::EQ;
1548  InvalidOnQNaN = false;
1549  break;
1550  case ISD::SETGT:
1551  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1552  case ISD::SETGE:
1553  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1554  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1555  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1556  case ISD::SETONE:
1557  CondCode = ARMCC::MI;
1558  CondCode2 = ARMCC::GT;
1559  InvalidOnQNaN = false;
1560  break;
1561  case ISD::SETO: CondCode = ARMCC::VC; break;
1562  case ISD::SETUO: CondCode = ARMCC::VS; break;
1563  case ISD::SETUEQ:
1564  CondCode = ARMCC::EQ;
1565  CondCode2 = ARMCC::VS;
1566  InvalidOnQNaN = false;
1567  break;
1568  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1569  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1570  case ISD::SETLT:
1571  case ISD::SETULT: CondCode = ARMCC::LT; break;
1572  case ISD::SETLE:
1573  case ISD::SETULE: CondCode = ARMCC::LE; break;
1574  case ISD::SETNE:
1575  case ISD::SETUNE:
1576  CondCode = ARMCC::NE;
1577  InvalidOnQNaN = false;
1578  break;
1579  }
1580 }
1581 
1582 //===----------------------------------------------------------------------===//
1583 // Calling Convention Implementation
1584 //===----------------------------------------------------------------------===//
1585 
1586 #include "ARMGenCallingConv.inc"
1587 
1588 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1589 /// account presence of floating point hardware and calling convention
1590 /// limitations, such as support for variadic functions.
1592 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1593  bool isVarArg) const {
1594  switch (CC) {
1595  default:
1596  report_fatal_error("Unsupported calling convention");
1598  case CallingConv::ARM_APCS:
1599  case CallingConv::GHC:
1600  return CC;
1604  case CallingConv::Swift:
1606  case CallingConv::C:
1607  if (!Subtarget->isAAPCS_ABI())
1608  return CallingConv::ARM_APCS;
1609  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1611  !isVarArg)
1613  else
1614  return CallingConv::ARM_AAPCS;
1615  case CallingConv::Fast:
1617  if (!Subtarget->isAAPCS_ABI()) {
1618  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1619  return CallingConv::Fast;
1620  return CallingConv::ARM_APCS;
1621  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1623  else
1624  return CallingConv::ARM_AAPCS;
1625  }
1626 }
1627 
1629  bool isVarArg) const {
1630  return CCAssignFnForNode(CC, false, isVarArg);
1631 }
1632 
1634  bool isVarArg) const {
1635  return CCAssignFnForNode(CC, true, isVarArg);
1636 }
1637 
1638 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1639 /// CallingConvention.
1640 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1641  bool Return,
1642  bool isVarArg) const {
1643  switch (getEffectiveCallingConv(CC, isVarArg)) {
1644  default:
1645  report_fatal_error("Unsupported calling convention");
1646  case CallingConv::ARM_APCS:
1647  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1649  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1651  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1652  case CallingConv::Fast:
1653  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1654  case CallingConv::GHC:
1655  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1657  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1658  }
1659 }
1660 
1661 /// LowerCallResult - Lower the result values of a call into the
1662 /// appropriate copies out of appropriate physical registers.
1663 SDValue ARMTargetLowering::LowerCallResult(
1664  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1665  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1666  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1667  SDValue ThisVal) const {
1668  // Assign locations to each value returned by this call.
1670  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1671  *DAG.getContext());
1672  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1673 
1674  // Copy all of the result registers out of their specified physreg.
1675  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1676  CCValAssign VA = RVLocs[i];
1677 
1678  // Pass 'this' value directly from the argument to return value, to avoid
1679  // reg unit interference
1680  if (i == 0 && isThisReturn) {
1681  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1682  "unexpected return calling convention register assignment");
1683  InVals.push_back(ThisVal);
1684  continue;
1685  }
1686 
1687  SDValue Val;
1688  if (VA.needsCustom()) {
1689  // Handle f64 or half of a v2f64.
1690  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1691  InFlag);
1692  Chain = Lo.getValue(1);
1693  InFlag = Lo.getValue(2);
1694  VA = RVLocs[++i]; // skip ahead to next loc
1695  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1696  InFlag);
1697  Chain = Hi.getValue(1);
1698  InFlag = Hi.getValue(2);
1699  if (!Subtarget->isLittle())
1700  std::swap (Lo, Hi);
1701  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1702 
1703  if (VA.getLocVT() == MVT::v2f64) {
1704  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1705  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1706  DAG.getConstant(0, dl, MVT::i32));
1707 
1708  VA = RVLocs[++i]; // skip ahead to next loc
1709  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1710  Chain = Lo.getValue(1);
1711  InFlag = Lo.getValue(2);
1712  VA = RVLocs[++i]; // skip ahead to next loc
1713  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1714  Chain = Hi.getValue(1);
1715  InFlag = Hi.getValue(2);
1716  if (!Subtarget->isLittle())
1717  std::swap (Lo, Hi);
1718  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1719  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1720  DAG.getConstant(1, dl, MVT::i32));
1721  }
1722  } else {
1723  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1724  InFlag);
1725  Chain = Val.getValue(1);
1726  InFlag = Val.getValue(2);
1727  }
1728 
1729  switch (VA.getLocInfo()) {
1730  default: llvm_unreachable("Unknown loc info!");
1731  case CCValAssign::Full: break;
1732  case CCValAssign::BCvt:
1733  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1734  break;
1735  }
1736 
1737  InVals.push_back(Val);
1738  }
1739 
1740  return Chain;
1741 }
1742 
1743 /// LowerMemOpCallTo - Store the argument to the stack.
1744 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1745  SDValue Arg, const SDLoc &dl,
1746  SelectionDAG &DAG,
1747  const CCValAssign &VA,
1748  ISD::ArgFlagsTy Flags) const {
1749  unsigned LocMemOffset = VA.getLocMemOffset();
1750  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1751  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1752  StackPtr, PtrOff);
1753  return DAG.getStore(
1754  Chain, dl, Arg, PtrOff,
1755  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1756 }
1757 
1758 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1759  SDValue Chain, SDValue &Arg,
1760  RegsToPassVector &RegsToPass,
1761  CCValAssign &VA, CCValAssign &NextVA,
1762  SDValue &StackPtr,
1763  SmallVectorImpl<SDValue> &MemOpChains,
1764  ISD::ArgFlagsTy Flags) const {
1765  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1766  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1767  unsigned id = Subtarget->isLittle() ? 0 : 1;
1768  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1769 
1770  if (NextVA.isRegLoc())
1771  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1772  else {
1773  assert(NextVA.isMemLoc());
1774  if (!StackPtr.getNode())
1775  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1776  getPointerTy(DAG.getDataLayout()));
1777 
1778  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1779  dl, DAG, NextVA,
1780  Flags));
1781  }
1782 }
1783 
1784 /// LowerCall - Lowering a call into a callseq_start <-
1785 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1786 /// nodes.
1787 SDValue
1788 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1789  SmallVectorImpl<SDValue> &InVals) const {
1790  SelectionDAG &DAG = CLI.DAG;
1791  SDLoc &dl = CLI.DL;
1793  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1795  SDValue Chain = CLI.Chain;
1796  SDValue Callee = CLI.Callee;
1797  bool &isTailCall = CLI.IsTailCall;
1798  CallingConv::ID CallConv = CLI.CallConv;
1799  bool doesNotRet = CLI.DoesNotReturn;
1800  bool isVarArg = CLI.IsVarArg;
1801 
1802  MachineFunction &MF = DAG.getMachineFunction();
1803  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1804  bool isThisReturn = false;
1805  bool isSibCall = false;
1806  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1807 
1808  // Disable tail calls if they're not supported.
1809  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1810  isTailCall = false;
1811 
1812  if (isTailCall) {
1813  // Check if it's really possible to do a tail call.
1814  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1815  isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1816  Outs, OutVals, Ins, DAG);
1817  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1818  report_fatal_error("failed to perform tail call elimination on a call "
1819  "site marked musttail");
1820  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1821  // detected sibcalls.
1822  if (isTailCall) {
1823  ++NumTailCalls;
1824  isSibCall = true;
1825  }
1826  }
1827 
1828  // Analyze operands of the call, assigning locations to each operand.
1830  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1831  *DAG.getContext());
1832  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1833 
1834  // Get a count of how many bytes are to be pushed on the stack.
1835  unsigned NumBytes = CCInfo.getNextStackOffset();
1836 
1837  // For tail calls, memory operands are available in our caller's stack.
1838  if (isSibCall)
1839  NumBytes = 0;
1840 
1841  // Adjust the stack pointer for the new arguments...
1842  // These operations are automatically eliminated by the prolog/epilog pass
1843  if (!isSibCall)
1844  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1845 
1846  SDValue StackPtr =
1847  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1848 
1849  RegsToPassVector RegsToPass;
1850  SmallVector<SDValue, 8> MemOpChains;
1851 
1852  // Walk the register/memloc assignments, inserting copies/loads. In the case
1853  // of tail call optimization, arguments are handled later.
1854  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1855  i != e;
1856  ++i, ++realArgIdx) {
1857  CCValAssign &VA = ArgLocs[i];
1858  SDValue Arg = OutVals[realArgIdx];
1859  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1860  bool isByVal = Flags.isByVal();
1861 
1862  // Promote the value if needed.
1863  switch (VA.getLocInfo()) {
1864  default: llvm_unreachable("Unknown loc info!");
1865  case CCValAssign::Full: break;
1866  case CCValAssign::SExt:
1867  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1868  break;
1869  case CCValAssign::ZExt:
1870  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1871  break;
1872  case CCValAssign::AExt:
1873  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1874  break;
1875  case CCValAssign::BCvt:
1876  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1877  break;
1878  }
1879 
1880  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1881  if (VA.needsCustom()) {
1882  if (VA.getLocVT() == MVT::v2f64) {
1883  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1884  DAG.getConstant(0, dl, MVT::i32));
1885  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1886  DAG.getConstant(1, dl, MVT::i32));
1887 
1888  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1889  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1890 
1891  VA = ArgLocs[++i]; // skip ahead to next loc
1892  if (VA.isRegLoc()) {
1893  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1894  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1895  } else {
1896  assert(VA.isMemLoc());
1897 
1898  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1899  dl, DAG, VA, Flags));
1900  }
1901  } else {
1902  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1903  StackPtr, MemOpChains, Flags);
1904  }
1905  } else if (VA.isRegLoc()) {
1906  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1907  Outs[0].VT == MVT::i32) {
1908  assert(VA.getLocVT() == MVT::i32 &&
1909  "unexpected calling convention register assignment");
1910  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1911  "unexpected use of 'returned'");
1912  isThisReturn = true;
1913  }
1914  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1915  } else if (isByVal) {
1916  assert(VA.isMemLoc());
1917  unsigned offset = 0;
1918 
1919  // True if this byval aggregate will be split between registers
1920  // and memory.
1921  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1922  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1923 
1924  if (CurByValIdx < ByValArgsCount) {
1925 
1926  unsigned RegBegin, RegEnd;
1927  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1928 
1929  EVT PtrVT =
1931  unsigned int i, j;
1932  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1933  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1934  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1935  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1937  DAG.InferPtrAlignment(AddArg));
1938  MemOpChains.push_back(Load.getValue(1));
1939  RegsToPass.push_back(std::make_pair(j, Load));
1940  }
1941 
1942  // If parameter size outsides register area, "offset" value
1943  // helps us to calculate stack slot for remained part properly.
1944  offset = RegEnd - RegBegin;
1945 
1946  CCInfo.nextInRegsParam();
1947  }
1948 
1949  if (Flags.getByValSize() > 4*offset) {
1950  auto PtrVT = getPointerTy(DAG.getDataLayout());
1951  unsigned LocMemOffset = VA.getLocMemOffset();
1952  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1953  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1954  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1955  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1956  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1957  MVT::i32);
1958  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1959  MVT::i32);
1960 
1961  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1962  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1963  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1964  Ops));
1965  }
1966  } else if (!isSibCall) {
1967  assert(VA.isMemLoc());
1968 
1969  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1970  dl, DAG, VA, Flags));
1971  }
1972  }
1973 
1974  if (!MemOpChains.empty())
1975  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1976 
1977  // Build a sequence of copy-to-reg nodes chained together with token chain
1978  // and flag operands which copy the outgoing args into the appropriate regs.
1979  SDValue InFlag;
1980  // Tail call byval lowering might overwrite argument registers so in case of
1981  // tail call optimization the copies to registers are lowered later.
1982  if (!isTailCall)
1983  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1984  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1985  RegsToPass[i].second, InFlag);
1986  InFlag = Chain.getValue(1);
1987  }
1988 
1989  // For tail calls lower the arguments to the 'real' stack slot.
1990  if (isTailCall) {
1991  // Force all the incoming stack arguments to be loaded from the stack
1992  // before any new outgoing arguments are stored to the stack, because the
1993  // outgoing stack slots may alias the incoming argument stack slots, and
1994  // the alias isn't otherwise explicit. This is slightly more conservative
1995  // than necessary, because it means that each store effectively depends
1996  // on every argument instead of just those arguments it would clobber.
1997 
1998  // Do not flag preceding copytoreg stuff together with the following stuff.
1999  InFlag = SDValue();
2000  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2001  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2002  RegsToPass[i].second, InFlag);
2003  InFlag = Chain.getValue(1);
2004  }
2005  InFlag = SDValue();
2006  }
2007 
2008  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2009  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2010  // node so that legalize doesn't hack it.
2011  bool isDirect = false;
2012 
2013  const TargetMachine &TM = getTargetMachine();
2014  const Module *Mod = MF.getFunction().getParent();
2015  const GlobalValue *GV = nullptr;
2016  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2017  GV = G->getGlobal();
2018  bool isStub =
2019  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2020 
2021  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2022  bool isLocalARMFunc = false;
2024  auto PtrVt = getPointerTy(DAG.getDataLayout());
2025 
2026  if (Subtarget->genLongCalls()) {
2027  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2028  "long-calls codegen is not position independent!");
2029  // Handle a global address or an external symbol. If it's not one of
2030  // those, the target's already in a register, so we don't need to do
2031  // anything extra.
2032  if (isa<GlobalAddressSDNode>(Callee)) {
2033  // Create a constant pool entry for the callee address
2034  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2035  ARMConstantPoolValue *CPV =
2036  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2037 
2038  // Get the address of the callee into a register
2039  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2040  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2041  Callee = DAG.getLoad(
2042  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2044  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2045  const char *Sym = S->getSymbol();
2046 
2047  // Create a constant pool entry for the callee address
2048  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2049  ARMConstantPoolValue *CPV =
2051  ARMPCLabelIndex, 0);
2052  // Get the address of the callee into a register
2053  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2054  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2055  Callee = DAG.getLoad(
2056  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2058  }
2059  } else if (isa<GlobalAddressSDNode>(Callee)) {
2060  // If we're optimizing for minimum size and the function is called three or
2061  // more times in this block, we can improve codesize by calling indirectly
2062  // as BLXr has a 16-bit encoding.
2063  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2064  auto *BB = CLI.CS.getParent();
2065  bool PreferIndirect =
2066  Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2067  count_if(GV->users(), [&BB](const User *U) {
2068  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2069  }) > 2;
2070 
2071  if (!PreferIndirect) {
2072  isDirect = true;
2073  bool isDef = GV->isStrongDefinitionForLinker();
2074 
2075  // ARM call to a local ARM function is predicable.
2076  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2077  // tBX takes a register source operand.
2078  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2079  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2080  Callee = DAG.getNode(
2081  ARMISD::WrapperPIC, dl, PtrVt,
2082  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2083  Callee = DAG.getLoad(
2084  PtrVt, dl, DAG.getEntryNode(), Callee,
2086  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2088  } else if (Subtarget->isTargetCOFF()) {
2089  assert(Subtarget->isTargetWindows() &&
2090  "Windows is the only supported COFF target");
2091  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2094  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2095  TargetFlags);
2096  if (GV->hasDLLImportStorageClass())
2097  Callee =
2098  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2099  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2101  } else {
2102  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2103  }
2104  }
2105  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2106  isDirect = true;
2107  // tBX takes a register source operand.
2108  const char *Sym = S->getSymbol();
2109  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2110  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2111  ARMConstantPoolValue *CPV =
2113  ARMPCLabelIndex, 4);
2114  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2115  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2116  Callee = DAG.getLoad(
2117  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2119  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2120  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2121  } else {
2122  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2123  }
2124  }
2125 
2126  // FIXME: handle tail calls differently.
2127  unsigned CallOpc;
2128  if (Subtarget->isThumb()) {
2129  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2130  CallOpc = ARMISD::CALL_NOLINK;
2131  else
2132  CallOpc = ARMISD::CALL;
2133  } else {
2134  if (!isDirect && !Subtarget->hasV5TOps())
2135  CallOpc = ARMISD::CALL_NOLINK;
2136  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2137  // Emit regular call when code size is the priority
2138  !MF.getFunction().optForMinSize())
2139  // "mov lr, pc; b _foo" to avoid confusing the RSP
2140  CallOpc = ARMISD::CALL_NOLINK;
2141  else
2142  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2143  }
2144 
2145  std::vector<SDValue> Ops;
2146  Ops.push_back(Chain);
2147  Ops.push_back(Callee);
2148 
2149  // Add argument registers to the end of the list so that they are known live
2150  // into the call.
2151  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2152  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2153  RegsToPass[i].second.getValueType()));
2154 
2155  // Add a register mask operand representing the call-preserved registers.
2156  if (!isTailCall) {
2157  const uint32_t *Mask;
2158  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2159  if (isThisReturn) {
2160  // For 'this' returns, use the R0-preserving mask if applicable
2161  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2162  if (!Mask) {
2163  // Set isThisReturn to false if the calling convention is not one that
2164  // allows 'returned' to be modeled in this way, so LowerCallResult does
2165  // not try to pass 'this' straight through
2166  isThisReturn = false;
2167  Mask = ARI->getCallPreservedMask(MF, CallConv);
2168  }
2169  } else
2170  Mask = ARI->getCallPreservedMask(MF, CallConv);
2171 
2172  assert(Mask && "Missing call preserved mask for calling convention");
2173  Ops.push_back(DAG.getRegisterMask(Mask));
2174  }
2175 
2176  if (InFlag.getNode())
2177  Ops.push_back(InFlag);
2178 
2179  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2180  if (isTailCall) {
2182  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2183  }
2184 
2185  // Returns a chain and a flag for retval copy to use.
2186  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2187  InFlag = Chain.getValue(1);
2188 
2189  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2190  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2191  if (!Ins.empty())
2192  InFlag = Chain.getValue(1);
2193 
2194  // Handle result values, copying them out of physregs into vregs that we
2195  // return.
2196  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2197  InVals, isThisReturn,
2198  isThisReturn ? OutVals[0] : SDValue());
2199 }
2200 
2201 /// HandleByVal - Every parameter *after* a byval parameter is passed
2202 /// on the stack. Remember the next parameter register to allocate,
2203 /// and then confiscate the rest of the parameter registers to insure
2204 /// this.
2205 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2206  unsigned Align) const {
2207  // Byval (as with any stack) slots are always at least 4 byte aligned.
2208  Align = std::max(Align, 4U);
2209 
2210  unsigned Reg = State->AllocateReg(GPRArgRegs);
2211  if (!Reg)
2212  return;
2213 
2214  unsigned AlignInRegs = Align / 4;
2215  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2216  for (unsigned i = 0; i < Waste; ++i)
2217  Reg = State->AllocateReg(GPRArgRegs);
2218 
2219  if (!Reg)
2220  return;
2221 
2222  unsigned Excess = 4 * (ARM::R4 - Reg);
2223 
2224  // Special case when NSAA != SP and parameter size greater than size of
2225  // all remained GPR regs. In that case we can't split parameter, we must
2226  // send it to stack. We also must set NCRN to R4, so waste all
2227  // remained registers.
2228  const unsigned NSAAOffset = State->getNextStackOffset();
2229  if (NSAAOffset != 0 && Size > Excess) {
2230  while (State->AllocateReg(GPRArgRegs))
2231  ;
2232  return;
2233  }
2234 
2235  // First register for byval parameter is the first register that wasn't
2236  // allocated before this method call, so it would be "reg".
2237  // If parameter is small enough to be saved in range [reg, r4), then
2238  // the end (first after last) register would be reg + param-size-in-regs,
2239  // else parameter would be splitted between registers and stack,
2240  // end register would be r4 in this case.
2241  unsigned ByValRegBegin = Reg;
2242  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2243  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2244  // Note, first register is allocated in the beginning of function already,
2245  // allocate remained amount of registers we need.
2246  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2247  State->AllocateReg(GPRArgRegs);
2248  // A byval parameter that is split between registers and memory needs its
2249  // size truncated here.
2250  // In the case where the entire structure fits in registers, we set the
2251  // size in memory to zero.
2252  Size = std::max<int>(Size - Excess, 0);
2253 }
2254 
2255 /// MatchingStackOffset - Return true if the given stack call argument is
2256 /// already available in the same position (relatively) of the caller's
2257 /// incoming argument stack.
2258 static
2261  const TargetInstrInfo *TII) {
2262  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2263  int FI = std::numeric_limits<int>::max();
2264  if (Arg.getOpcode() == ISD::CopyFromReg) {
2265  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2267  return false;
2268  MachineInstr *Def = MRI->getVRegDef(VR);
2269  if (!Def)
2270  return false;
2271  if (!Flags.isByVal()) {
2272  if (!TII->isLoadFromStackSlot(*Def, FI))
2273  return false;
2274  } else {
2275  return false;
2276  }
2277  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2278  if (Flags.isByVal())
2279  // ByVal argument is passed in as a pointer but it's now being
2280  // dereferenced. e.g.
2281  // define @foo(%struct.X* %A) {
2282  // tail call @bar(%struct.X* byval %A)
2283  // }
2284  return false;
2285  SDValue Ptr = Ld->getBasePtr();
2286  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2287  if (!FINode)
2288  return false;
2289  FI = FINode->getIndex();
2290  } else
2291  return false;
2292 
2294  if (!MFI.isFixedObjectIndex(FI))
2295  return false;
2296  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2297 }
2298 
2299 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2300 /// for tail call optimization. Targets which want to do tail call
2301 /// optimization should implement this function.
2302 bool
2303 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2304  CallingConv::ID CalleeCC,
2305  bool isVarArg,
2306  bool isCalleeStructRet,
2307  bool isCallerStructRet,
2308  const SmallVectorImpl<ISD::OutputArg> &Outs,
2309  const SmallVectorImpl<SDValue> &OutVals,
2310  const SmallVectorImpl<ISD::InputArg> &Ins,
2311  SelectionDAG& DAG) const {
2312  MachineFunction &MF = DAG.getMachineFunction();
2313  const Function &CallerF = MF.getFunction();
2314  CallingConv::ID CallerCC = CallerF.getCallingConv();
2315 
2316  assert(Subtarget->supportsTailCall());
2317 
2318  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2319  // to the call take up r0-r3. The reason is that there are no legal registers
2320  // left to hold the pointer to the function to be called.
2321  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2322  !isa<GlobalAddressSDNode>(Callee.getNode()))
2323  return false;
2324 
2325  // Look for obvious safe cases to perform tail call optimization that do not
2326  // require ABI changes. This is what gcc calls sibcall.
2327 
2328  // Exception-handling functions need a special set of instructions to indicate
2329  // a return to the hardware. Tail-calling another function would probably
2330  // break this.
2331  if (CallerF.hasFnAttribute("interrupt"))
2332  return false;
2333 
2334  // Also avoid sibcall optimization if either caller or callee uses struct
2335  // return semantics.
2336  if (isCalleeStructRet || isCallerStructRet)
2337  return false;
2338 
2339  // Externally-defined functions with weak linkage should not be
2340  // tail-called on ARM when the OS does not support dynamic
2341  // pre-emption of symbols, as the AAELF spec requires normal calls
2342  // to undefined weak functions to be replaced with a NOP or jump to the
2343  // next instruction. The behaviour of branch instructions in this
2344  // situation (as used for tail calls) is implementation-defined, so we
2345  // cannot rely on the linker replacing the tail call with a return.
2346  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2347  const GlobalValue *GV = G->getGlobal();
2348  const Triple &TT = getTargetMachine().getTargetTriple();
2349  if (GV->hasExternalWeakLinkage() &&
2350  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2351  return false;
2352  }
2353 
2354  // Check that the call results are passed in the same way.
2355  LLVMContext &C = *DAG.getContext();
2356  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2357  CCAssignFnForReturn(CalleeCC, isVarArg),
2358  CCAssignFnForReturn(CallerCC, isVarArg)))
2359  return false;
2360  // The callee has to preserve all registers the caller needs to preserve.
2361  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2362  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2363  if (CalleeCC != CallerCC) {
2364  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2365  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2366  return false;
2367  }
2368 
2369  // If Caller's vararg or byval argument has been split between registers and
2370  // stack, do not perform tail call, since part of the argument is in caller's
2371  // local frame.
2372  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2373  if (AFI_Caller->getArgRegsSaveSize())
2374  return false;
2375 
2376  // If the callee takes no arguments then go on to check the results of the
2377  // call.
2378  if (!Outs.empty()) {
2379  // Check if stack adjustment is needed. For now, do not do this if any
2380  // argument is passed on the stack.
2382  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2383  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2384  if (CCInfo.getNextStackOffset()) {
2385  // Check if the arguments are already laid out in the right way as
2386  // the caller's fixed stack objects.
2387  MachineFrameInfo &MFI = MF.getFrameInfo();
2388  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2389  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2390  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2391  i != e;
2392  ++i, ++realArgIdx) {
2393  CCValAssign &VA = ArgLocs[i];
2394  EVT RegVT = VA.getLocVT();
2395  SDValue Arg = OutVals[realArgIdx];
2396  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2397  if (VA.getLocInfo() == CCValAssign::Indirect)
2398  return false;
2399  if (VA.needsCustom()) {
2400  // f64 and vector types are split into multiple registers or
2401  // register/stack-slot combinations. The types will not match
2402  // the registers; give up on memory f64 refs until we figure
2403  // out what to do about this.
2404  if (!VA.isRegLoc())
2405  return false;
2406  if (!ArgLocs[++i].isRegLoc())
2407  return false;
2408  if (RegVT == MVT::v2f64) {
2409  if (!ArgLocs[++i].isRegLoc())
2410  return false;
2411  if (!ArgLocs[++i].isRegLoc())
2412  return false;
2413  }
2414  } else if (!VA.isRegLoc()) {
2415  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2416  MFI, MRI, TII))
2417  return false;
2418  }
2419  }
2420  }
2421 
2422  const MachineRegisterInfo &MRI = MF.getRegInfo();
2423  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2424  return false;
2425  }
2426 
2427  return true;
2428 }
2429 
2430 bool
2431 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2432  MachineFunction &MF, bool isVarArg,
2433  const SmallVectorImpl<ISD::OutputArg> &Outs,
2434  LLVMContext &Context) const {
2436  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2437  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2438 }
2439 
2441  const SDLoc &DL, SelectionDAG &DAG) {
2442  const MachineFunction &MF = DAG.getMachineFunction();
2443  const Function &F = MF.getFunction();
2444 
2445  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2446 
2447  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2448  // version of the "preferred return address". These offsets affect the return
2449  // instruction if this is a return from PL1 without hypervisor extensions.
2450  // IRQ/FIQ: +4 "subs pc, lr, #4"
2451  // SWI: 0 "subs pc, lr, #0"
2452  // ABORT: +4 "subs pc, lr, #4"
2453  // UNDEF: +4/+2 "subs pc, lr, #0"
2454  // UNDEF varies depending on where the exception came from ARM or Thumb
2455  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2456 
2457  int64_t LROffset;
2458  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2459  IntKind == "ABORT")
2460  LROffset = 4;
2461  else if (IntKind == "SWI" || IntKind == "UNDEF")
2462  LROffset = 0;
2463  else
2464  report_fatal_error("Unsupported interrupt attribute. If present, value "
2465  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2466 
2467  RetOps.insert(RetOps.begin() + 1,
2468  DAG.getConstant(LROffset, DL, MVT::i32, false));
2469 
2470  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2471 }
2472 
2473 SDValue
2474 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2475  bool isVarArg,
2476  const SmallVectorImpl<ISD::OutputArg> &Outs,
2477  const SmallVectorImpl<SDValue> &OutVals,
2478  const SDLoc &dl, SelectionDAG &DAG) const {
2479  // CCValAssign - represent the assignment of the return value to a location.
2481 
2482  // CCState - Info about the registers and stack slots.
2483  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2484  *DAG.getContext());
2485 
2486  // Analyze outgoing return values.
2487  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2488 
2489  SDValue Flag;
2490  SmallVector<SDValue, 4> RetOps;
2491  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2492  bool isLittleEndian = Subtarget->isLittle();
2493 
2494  MachineFunction &MF = DAG.getMachineFunction();
2496  AFI->setReturnRegsCount(RVLocs.size());
2497 
2498  // Copy the result values into the output registers.
2499  for (unsigned i = 0, realRVLocIdx = 0;
2500  i != RVLocs.size();
2501  ++i, ++realRVLocIdx) {
2502  CCValAssign &VA = RVLocs[i];
2503  assert(VA.isRegLoc() && "Can only return in registers!");
2504 
2505  SDValue Arg = OutVals[realRVLocIdx];
2506  bool ReturnF16 = false;
2507 
2508  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2509  // Half-precision return values can be returned like this:
2510  //
2511  // t11 f16 = fadd ...
2512  // t12: i16 = bitcast t11
2513  // t13: i32 = zero_extend t12
2514  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2515  //
2516  // to avoid code generation for bitcasts, we simply set Arg to the node
2517  // that produces the f16 value, t11 in this case.
2518  //
2519  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2520  SDValue ZE = Arg.getOperand(0);
2521  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2522  SDValue BC = ZE.getOperand(0);
2523  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2524  Arg = BC.getOperand(0);
2525  ReturnF16 = true;
2526  }
2527  }
2528  }
2529  }
2530 
2531  switch (VA.getLocInfo()) {
2532  default: llvm_unreachable("Unknown loc info!");
2533  case CCValAssign::Full: break;
2534  case CCValAssign::BCvt:
2535  if (!ReturnF16)
2536  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2537  break;
2538  }
2539 
2540  if (VA.needsCustom()) {
2541  if (VA.getLocVT() == MVT::v2f64) {
2542  // Extract the first half and return it in two registers.
2543  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2544  DAG.getConstant(0, dl, MVT::i32));
2545  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2546  DAG.getVTList(MVT::i32, MVT::i32), Half);
2547 
2548  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2549  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2550  Flag);
2551  Flag = Chain.getValue(1);
2552  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2553  VA = RVLocs[++i]; // skip ahead to next loc
2554  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2555  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2556  Flag);
2557  Flag = Chain.getValue(1);
2558  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2559  VA = RVLocs[++i]; // skip ahead to next loc
2560 
2561  // Extract the 2nd half and fall through to handle it as an f64 value.
2562  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2563  DAG.getConstant(1, dl, MVT::i32));
2564  }
2565  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2566  // available.
2567  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2568  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2569  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2570  fmrrd.getValue(isLittleEndian ? 0 : 1),
2571  Flag);
2572  Flag = Chain.getValue(1);
2573  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2574  VA = RVLocs[++i]; // skip ahead to next loc
2575  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2576  fmrrd.getValue(isLittleEndian ? 1 : 0),
2577  Flag);
2578  } else
2579  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2580 
2581  // Guarantee that all emitted copies are
2582  // stuck together, avoiding something bad.
2583  Flag = Chain.getValue(1);
2584  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2585  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2586  }
2587  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2588  const MCPhysReg *I =
2590  if (I) {
2591  for (; *I; ++I) {
2592  if (ARM::GPRRegClass.contains(*I))
2593  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2594  else if (ARM::DPRRegClass.contains(*I))
2595  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2596  else
2597  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2598  }
2599  }
2600 
2601  // Update chain and glue.
2602  RetOps[0] = Chain;
2603  if (Flag.getNode())
2604  RetOps.push_back(Flag);
2605 
2606  // CPUs which aren't M-class use a special sequence to return from
2607  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2608  // though we use "subs pc, lr, #N").
2609  //
2610  // M-class CPUs actually use a normal return sequence with a special
2611  // (hardware-provided) value in LR, so the normal code path works.
2612  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2613  !Subtarget->isMClass()) {
2614  if (Subtarget->isThumb1Only())
2615  report_fatal_error("interrupt attribute is not supported in Thumb1");
2616  return LowerInterruptReturn(RetOps, dl, DAG);
2617  }
2618 
2619  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2620 }
2621 
2622 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2623  if (N->getNumValues() != 1)
2624  return false;
2625  if (!N->hasNUsesOfValue(1, 0))
2626  return false;
2627 
2628  SDValue TCChain = Chain;
2629  SDNode *Copy = *N->use_begin();
2630  if (Copy->getOpcode() == ISD::CopyToReg) {
2631  // If the copy has a glue operand, we conservatively assume it isn't safe to
2632  // perform a tail call.
2633  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2634  return false;
2635  TCChain = Copy->getOperand(0);
2636  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2637  SDNode *VMov = Copy;
2638  // f64 returned in a pair of GPRs.
2639  SmallPtrSet<SDNode*, 2> Copies;
2640  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2641  UI != UE; ++UI) {
2642  if (UI->getOpcode() != ISD::CopyToReg)
2643  return false;
2644  Copies.insert(*UI);
2645  }
2646  if (Copies.size() > 2)
2647  return false;
2648 
2649  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2650  UI != UE; ++UI) {
2651  SDValue UseChain = UI->getOperand(0);
2652  if (Copies.count(UseChain.getNode()))
2653  // Second CopyToReg
2654  Copy = *UI;
2655  else {
2656  // We are at the top of this chain.
2657  // If the copy has a glue operand, we conservatively assume it
2658  // isn't safe to perform a tail call.
2659  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2660  return false;
2661  // First CopyToReg
2662  TCChain = UseChain;
2663  }
2664  }
2665  } else if (Copy->getOpcode() == ISD::BITCAST) {
2666  // f32 returned in a single GPR.
2667  if (!Copy->hasOneUse())
2668  return false;
2669  Copy = *Copy->use_begin();
2670  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2671  return false;
2672  // If the copy has a glue operand, we conservatively assume it isn't safe to
2673  // perform a tail call.
2674  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2675  return false;
2676  TCChain = Copy->getOperand(0);
2677  } else {
2678  return false;
2679  }
2680 
2681  bool HasRet = false;
2682  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2683  UI != UE; ++UI) {
2684  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2685  UI->getOpcode() != ARMISD::INTRET_FLAG)
2686  return false;
2687  HasRet = true;
2688  }
2689 
2690  if (!HasRet)
2691  return false;
2692 
2693  Chain = TCChain;
2694  return true;
2695 }
2696 
2697 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2698  if (!Subtarget->supportsTailCall())
2699  return false;
2700 
2701  auto Attr =
2702  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2703  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2704  return false;
2705 
2706  return true;
2707 }
2708 
2709 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2710 // and pass the lower and high parts through.
2712  SDLoc DL(Op);
2713  SDValue WriteValue = Op->getOperand(2);
2714 
2715  // This function is only supposed to be called for i64 type argument.
2716  assert(WriteValue.getValueType() == MVT::i64
2717  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2718 
2719  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2720  DAG.getConstant(0, DL, MVT::i32));
2721  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2722  DAG.getConstant(1, DL, MVT::i32));
2723  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2724  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2725 }
2726 
2727 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2728 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2729 // one of the above mentioned nodes. It has to be wrapped because otherwise
2730 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2731 // be used to form addressing mode. These wrapped nodes will be selected
2732 // into MOVi.
2733 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2734  SelectionDAG &DAG) const {
2735  EVT PtrVT = Op.getValueType();
2736  // FIXME there is no actual debug info here
2737  SDLoc dl(Op);
2738  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2739  SDValue Res;
2740 
2741  // When generating execute-only code Constant Pools must be promoted to the
2742  // global data section. It's a bit ugly that we can't share them across basic
2743  // blocks, but this way we guarantee that execute-only behaves correct with
2744  // position-independent addressing modes.
2745  if (Subtarget->genExecuteOnly()) {
2746  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2747  auto T = const_cast<Type*>(CP->getType());
2748  auto C = const_cast<Constant*>(CP->getConstVal());
2749  auto M = const_cast<Module*>(DAG.getMachineFunction().
2750  getFunction().getParent());
2751  auto GV = new GlobalVariable(
2752  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2753  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2754  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2755  Twine(AFI->createPICLabelUId())
2756  );
2757  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2758  dl, PtrVT);
2759  return LowerGlobalAddress(GA, DAG);
2760  }
2761 
2762  if (CP->isMachineConstantPoolEntry())
2763  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2764  CP->getAlignment());
2765  else
2766  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2767  CP->getAlignment());
2768  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2769 }
2770 
2773 }
2774 
2775 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2776  SelectionDAG &DAG) const {
2777  MachineFunction &MF = DAG.getMachineFunction();
2779  unsigned ARMPCLabelIndex = 0;
2780  SDLoc DL(Op);
2781  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2782  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2783  SDValue CPAddr;
2784  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2785  if (!IsPositionIndependent) {
2786  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2787  } else {
2788  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2789  ARMPCLabelIndex = AFI->createPICLabelUId();
2790  ARMConstantPoolValue *CPV =
2791  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2792  ARMCP::CPBlockAddress, PCAdj);
2793  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2794  }
2795  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2796  SDValue Result = DAG.getLoad(
2797  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2799  if (!IsPositionIndependent)
2800  return Result;
2801  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2802  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2803 }
2804 
2805 /// Convert a TLS address reference into the correct sequence of loads
2806 /// and calls to compute the variable's address for Darwin, and return an
2807 /// SDValue containing the final node.
2808 
2809 /// Darwin only has one TLS scheme which must be capable of dealing with the
2810 /// fully general situation, in the worst case. This means:
2811 /// + "extern __thread" declaration.
2812 /// + Defined in a possibly unknown dynamic library.
2813 ///
2814 /// The general system is that each __thread variable has a [3 x i32] descriptor
2815 /// which contains information used by the runtime to calculate the address. The
2816 /// only part of this the compiler needs to know about is the first word, which
2817 /// contains a function pointer that must be called with the address of the
2818 /// entire descriptor in "r0".
2819 ///
2820 /// Since this descriptor may be in a different unit, in general access must
2821 /// proceed along the usual ARM rules. A common sequence to produce is:
2822 ///
2823 /// movw rT1, :lower16:_var$non_lazy_ptr
2824 /// movt rT1, :upper16:_var$non_lazy_ptr
2825 /// ldr r0, [rT1]
2826 /// ldr rT2, [r0]
2827 /// blx rT2
2828 /// [...address now in r0...]
2829 SDValue
2830 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2831  SelectionDAG &DAG) const {
2832  assert(Subtarget->isTargetDarwin() &&
2833  "This function expects a Darwin target");
2834  SDLoc DL(Op);
2835 
2836  // First step is to get the address of the actua global symbol. This is where
2837  // the TLS descriptor lives.
2838  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2839 
2840  // The first entry in the descriptor is a function pointer that we must call
2841  // to obtain the address of the variable.
2842  SDValue Chain = DAG.getEntryNode();
2843  SDValue FuncTLVGet = DAG.getLoad(
2844  MVT::i32, DL, Chain, DescAddr,
2846  /* Alignment = */ 4,
2849  Chain = FuncTLVGet.getValue(1);
2850 
2852  MachineFrameInfo &MFI = F.getFrameInfo();
2853  MFI.setAdjustsStack(true);
2854 
2855  // TLS calls preserve all registers except those that absolutely must be
2856  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2857  // silly).
2858  auto TRI =
2859  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2860  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2861  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2862 
2863  // Finally, we can make the call. This is just a degenerate version of a
2864  // normal AArch64 call node: r0 takes the address of the descriptor, and
2865  // returns the address of the variable in this thread.
2866  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2867  Chain =
2869  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2870  DAG.getRegisterMask(Mask), Chain.getValue(1));
2871  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2872 }
2873 
2874 SDValue
2875 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2876  SelectionDAG &DAG) const {
2877  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2878 
2879  SDValue Chain = DAG.getEntryNode();
2880  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2881  SDLoc DL(Op);
2882 
2883  // Load the current TEB (thread environment block)
2884  SDValue Ops[] = {Chain,
2885  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2886  DAG.getConstant(15, DL, MVT::i32),
2887  DAG.getConstant(0, DL, MVT::i32),
2888  DAG.getConstant(13, DL, MVT::i32),
2889  DAG.getConstant(0, DL, MVT::i32),
2890  DAG.getConstant(2, DL, MVT::i32)};
2891  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2892  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2893 
2894  SDValue TEB = CurrentTEB.getValue(0);
2895  Chain = CurrentTEB.getValue(1);
2896 
2897  // Load the ThreadLocalStoragePointer from the TEB
2898  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2899  SDValue TLSArray =
2900  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2901  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2902 
2903  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2904  // offset into the TLSArray.
2905 
2906  // Load the TLS index from the C runtime
2907  SDValue TLSIndex =
2908  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2909  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2910  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2911 
2912  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2913  DAG.getConstant(2, DL, MVT::i32));
2914  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2915  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2916  MachinePointerInfo());
2917 
2918  // Get the offset of the start of the .tls section (section base)
2919  const auto *GA = cast<GlobalAddressSDNode>(Op);
2920  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2921  SDValue Offset = DAG.getLoad(
2922  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2923  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2925 
2926  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2927 }
2928 
2929 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2930 SDValue
2931 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2932  SelectionDAG &DAG) const {
2933  SDLoc dl(GA);
2934  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2935  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2936  MachineFunction &MF = DAG.getMachineFunction();
2938  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2939  ARMConstantPoolValue *CPV =
2940  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2941  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2942  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2943  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2944  Argument = DAG.getLoad(
2945  PtrVT, dl, DAG.getEntryNode(), Argument,
2947  SDValue Chain = Argument.getValue(1);
2948 
2949  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2950  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2951 
2952  // call __tls_get_addr.
2953  ArgListTy Args;
2954  ArgListEntry Entry;
2955  Entry.Node = Argument;
2956  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2957  Args.push_back(Entry);
2958 
2959  // FIXME: is there useful debug info available here?
2961  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2963  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2964 
2965  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2966  return CallResult.first;
2967 }
2968 
2969 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2970 // "local exec" model.
2971 SDValue
2972 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2973  SelectionDAG &DAG,
2974  TLSModel::Model model) const {
2975  const GlobalValue *GV = GA->getGlobal();
2976  SDLoc dl(GA);
2977  SDValue Offset;
2978  SDValue Chain = DAG.getEntryNode();
2979  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2980  // Get the Thread Pointer
2982 
2983  if (model == TLSModel::InitialExec) {
2984  MachineFunction &MF = DAG.getMachineFunction();
2986  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2987  // Initial exec model.
2988  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2989  ARMConstantPoolValue *CPV =
2990  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2992  true);
2993  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2994  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2995  Offset = DAG.getLoad(
2996  PtrVT, dl, Chain, Offset,
2998  Chain = Offset.getValue(1);
2999 
3000  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3001  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3002 
3003  Offset = DAG.getLoad(
3004  PtrVT, dl, Chain, Offset,
3006  } else {
3007  // local exec model
3008  assert(model == TLSModel::LocalExec);
3009  ARMConstantPoolValue *CPV =
3011  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3012  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3013  Offset = DAG.getLoad(
3014  PtrVT, dl, Chain, Offset,
3016  }
3017 
3018  // The address of the thread local variable is the add of the thread
3019  // pointer with the offset of the variable.
3020  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3021 }
3022 
3023 SDValue
3024 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3025  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3026  if (DAG.getTarget().useEmulatedTLS())
3027  return LowerToTLSEmulatedModel(GA, DAG);
3028 
3029  if (Subtarget->isTargetDarwin())
3030  return LowerGlobalTLSAddressDarwin(Op, DAG);
3031 
3032  if (Subtarget->isTargetWindows())
3033  return LowerGlobalTLSAddressWindows(Op, DAG);
3034 
3035  // TODO: implement the "local dynamic" model
3036  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3038 
3039  switch (model) {
3042  return LowerToTLSGeneralDynamicModel(GA, DAG);
3043  case TLSModel::InitialExec:
3044  case TLSModel::LocalExec:
3045  return LowerToTLSExecModels(GA, DAG, model);
3046  }
3047  llvm_unreachable("bogus TLS model");
3048 }
3049 
3050 /// Return true if all users of V are within function F, looking through
3051 /// ConstantExprs.
3052 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3053  SmallVector<const User*,4> Worklist;
3054  for (auto *U : V->users())
3055  Worklist.push_back(U);
3056  while (!Worklist.empty()) {
3057  auto *U = Worklist.pop_back_val();
3058  if (isa<ConstantExpr>(U)) {
3059  for (auto *UU : U->users())
3060  Worklist.push_back(UU);
3061  continue;
3062  }
3063 
3064  auto *I = dyn_cast<Instruction>(U);
3065  if (!I || I->getParent()->getParent() != F)
3066  return false;
3067  }
3068  return true;
3069 }
3070 
3071 /// Return true if all users of V are within some (any) function, looking through
3072 /// ConstantExprs. In other words, are there any global constant users?
3073 static bool allUsersAreInFunctions(const Value *V) {
3074  SmallVector<const User*,4> Worklist;
3075  for (auto *U : V->users())
3076  Worklist.push_back(U);
3077  while (!Worklist.empty()) {
3078  auto *U = Worklist.pop_back_val();
3079  if (isa<ConstantExpr>(U)) {
3080  for (auto *UU : U->users())
3081  Worklist.push_back(UU);
3082  continue;
3083  }
3084 
3085  if (!isa<Instruction>(U))
3086  return false;
3087  }
3088  return true;
3089 }
3090 
3091 // Return true if T is an integer, float or an array/vector of either.
3092 static bool isSimpleType(Type *T) {
3093  if (T->isIntegerTy() || T->isFloatingPointTy())
3094  return true;
3095  Type *SubT = nullptr;
3096  if (T->isArrayTy())
3097  SubT = T->getArrayElementType();
3098  else if (T->isVectorTy())
3099  SubT = T->getVectorElementType();
3100  else
3101  return false;
3102  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3103 }
3104 
3106  EVT PtrVT, const SDLoc &dl) {
3107  // If we're creating a pool entry for a constant global with unnamed address,
3108  // and the global is small enough, we can emit it inline into the constant pool
3109  // to save ourselves an indirection.
3110  //
3111  // This is a win if the constant is only used in one function (so it doesn't
3112  // need to be duplicated) or duplicating the constant wouldn't increase code
3113  // size (implying the constant is no larger than 4 bytes).
3114  const Function &F = DAG.getMachineFunction().getFunction();
3115 
3116  // We rely on this decision to inline being idemopotent and unrelated to the
3117  // use-site. We know that if we inline a variable at one use site, we'll
3118  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3119  // doesn't know about this optimization, so bail out if it's enabled else
3120  // we could decide to inline here (and thus never emit the GV) but require
3121  // the GV from fast-isel generated code.
3122  if (!EnableConstpoolPromotion ||
3124  return SDValue();
3125 
3126  auto *GVar = dyn_cast<GlobalVariable>(GV);
3127  if (!GVar || !GVar->hasInitializer() ||
3128  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3129  !GVar->hasLocalLinkage())
3130  return SDValue();
3131 
3132  // Ensure that we don't try and inline any type that contains pointers. If
3133  // we inline a value that contains relocations, we move the relocations from
3134  // .data to .text which is not ideal.
3135  auto *Init = GVar->getInitializer();
3136  if (!isSimpleType(Init->getType()))
3137  return SDValue();
3138 
3139  // The constant islands pass can only really deal with alignment requests
3140  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3141  // any type wanting greater alignment requirements than 4 bytes. We also
3142  // can only promote constants that are multiples of 4 bytes in size or
3143  // are paddable to a multiple of 4. Currently we only try and pad constants
3144  // that are strings for simplicity.
3145  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3146  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3147  unsigned Align = GVar->getAlignment();
3148  unsigned RequiredPadding = 4 - (Size % 4);
3149  bool PaddingPossible =
3150  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3151  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3152  Size == 0)
3153  return SDValue();
3154 
3155  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3156  MachineFunction &MF = DAG.getMachineFunction();
3158 
3159  // We can't bloat the constant pool too much, else the ConstantIslands pass
3160  // may fail to converge. If we haven't promoted this global yet (it may have
3161  // multiple uses), and promoting it would increase the constant pool size (Sz
3162  // > 4), ensure we have space to do so up to MaxTotal.
3163  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3164  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3166  return SDValue();
3167 
3168  // This is only valid if all users are in a single function OR it has users
3169  // in multiple functions but it no larger than a pointer. We also check if
3170  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3171  // address taken.
3172  if (!allUsersAreInFunction(GVar, &F) &&
3173  !(Size <= 4 && allUsersAreInFunctions(GVar)))
3174  return SDValue();
3175 
3176  // We're going to inline this global. Pad it out if needed.
3177  if (RequiredPadding != 4) {
3178  StringRef S = CDAInit->getAsString();
3179 
3181  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3182  while (RequiredPadding--)
3183  V.push_back(0);
3184  Init = ConstantDataArray::get(*DAG.getContext(), V);
3185  }
3186 
3187  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3188  SDValue CPAddr =
3189  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3190  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3193  PaddedSize - 4);
3194  }
3195  ++NumConstpoolPromoted;
3196  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3197 }
3198 
3200  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3201  GV = GA->getBaseObject();
3202  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3203  isa<Function>(GV);
3204 }
3205 
3206 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3207  SelectionDAG &DAG) const {
3208  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3209  default: llvm_unreachable("unknown object format");
3210  case Triple::COFF:
3211  return LowerGlobalAddressWindows(Op, DAG);
3212  case Triple::ELF:
3213  return LowerGlobalAddressELF(Op, DAG);
3214  case Triple::MachO:
3215  return LowerGlobalAddressDarwin(Op, DAG);
3216  }
3217 }
3218 
3219 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3220  SelectionDAG &DAG) const {
3221  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3222  SDLoc dl(Op);
3223  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3224  const TargetMachine &TM = getTargetMachine();
3225  bool IsRO = isReadOnly(GV);
3226 
3227  // promoteToConstantPool only if not generating XO text section
3228  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3229  if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3230  return V;
3231 
3232  if (isPositionIndependent()) {
3233  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3234  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3235  UseGOT_PREL ? ARMII::MO_GOT : 0);
3236  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3237  if (UseGOT_PREL)
3238  Result =
3239  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3241  return Result;
3242  } else if (Subtarget->isROPI() && IsRO) {
3243  // PC-relative.
3244  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3245  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3246  return Result;
3247  } else if (Subtarget->isRWPI() && !IsRO) {
3248  // SB-relative.
3249  SDValue RelAddr;
3250  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3251  ++NumMovwMovt;
3252  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3253  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3254  } else { // use literal pool for address constant
3255  ARMConstantPoolValue *CPV =
3257  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3258  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3259  RelAddr = DAG.getLoad(
3260  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3262  }
3263  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3264  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3265  return Result;
3266  }
3267 
3268  // If we have T2 ops, we can materialize the address directly via movt/movw
3269  // pair. This is always cheaper.
3270  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3271  ++NumMovwMovt;
3272  // FIXME: Once remat is capable of dealing with instructions with register
3273  // operands, expand this into two nodes.
3274  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3275  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3276  } else {
3277  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3278  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3279  return DAG.getLoad(
3280  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3282  }
3283 }
3284 
3285 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3286  SelectionDAG &DAG) const {
3287  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3288  "ROPI/RWPI not currently supported for Darwin");
3289  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3290  SDLoc dl(Op);
3291  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3292 
3293  if (Subtarget->useMovt(DAG.getMachineFunction()))
3294  ++NumMovwMovt;
3295 
3296  // FIXME: Once remat is capable of dealing with instructions with register
3297  // operands, expand this into multiple nodes
3298  unsigned Wrapper =
3300 
3301  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3302  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3303 
3304  if (Subtarget->isGVIndirectSymbol(GV))
3305  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3307  return Result;
3308 }
3309 
3310 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3311  SelectionDAG &DAG) const {
3312  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3313  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3314  "Windows on ARM expects to use movw/movt");
3315  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3316  "ROPI/RWPI not currently supported for Windows");
3317 
3318  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3319  const ARMII::TOF TargetFlags =
3320  (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3321  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3322  SDValue Result;
3323  SDLoc DL(Op);
3324 
3325  ++NumMovwMovt;
3326 
3327  // FIXME: Once remat is capable of dealing with instructions with register
3328  // operands, expand this into two nodes.
3329  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3330  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3331  TargetFlags));
3332  if (GV->hasDLLImportStorageClass())
3333  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3335  return Result;
3336 }
3337 
3338 SDValue
3339 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3340  SDLoc dl(Op);
3341  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3342  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3343  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3344  Op.getOperand(1), Val);
3345 }
3346 
3347 SDValue
3348 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3349  SDLoc dl(Op);
3350  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3351  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3352 }
3353 
3354 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3355  SelectionDAG &DAG) const {
3356  SDLoc dl(Op);
3358  Op.getOperand(0));
3359 }
3360 
3361 SDValue
3362 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3363  const ARMSubtarget *Subtarget) const {
3364  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3365  SDLoc dl(Op);
3366  switch (IntNo) {
3367  default: return SDValue(); // Don't custom lower most intrinsics.
3368  case Intrinsic::thread_pointer: {
3369  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3370  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3371  }
3372  case Intrinsic::eh_sjlj_lsda: {
3373  MachineFunction &MF = DAG.getMachineFunction();
3375  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3376  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3377  SDValue CPAddr;
3378  bool IsPositionIndependent = isPositionIndependent();
3379  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3380  ARMConstantPoolValue *CPV =
3381  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3382  ARMCP::CPLSDA, PCAdj);
3383  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3384  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3385  SDValue Result = DAG.getLoad(
3386  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3388 
3389  if (IsPositionIndependent) {
3390  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3391  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3392  }
3393  return Result;
3394  }
3395  case Intrinsic::arm_neon_vabs:
3396  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3397  Op.getOperand(1));
3398  case Intrinsic::arm_neon_vmulls:
3399  case Intrinsic::arm_neon_vmullu: {
3400  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3402  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3403  Op.getOperand(1), Op.getOperand(2));
3404  }
3405  case Intrinsic::arm_neon_vminnm:
3406  case Intrinsic::arm_neon_vmaxnm: {
3407  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3409  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3410  Op.getOperand(1), Op.getOperand(2));
3411  }
3412  case Intrinsic::arm_neon_vminu:
3413  case Intrinsic::arm_neon_vmaxu: {
3414  if (Op.getValueType().isFloatingPoint())
3415  return SDValue();
3416  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3417  ? ISD::UMIN : ISD::UMAX;
3418  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3419  Op.getOperand(1), Op.getOperand(2));
3420  }
3421  case Intrinsic::arm_neon_vmins:
3422  case Intrinsic::arm_neon_vmaxs: {
3423  // v{min,max}s is overloaded between signed integers and floats.
3424  if (!Op.getValueType().isFloatingPoint()) {
3425  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3426  ? ISD::SMIN : ISD::SMAX;
3427  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3428  Op.getOperand(1), Op.getOperand(2));
3429  }
3430  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3432  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3433  Op.getOperand(1), Op.getOperand(2));
3434  }
3435  case Intrinsic::arm_neon_vtbl1:
3436  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3437  Op.getOperand(1), Op.getOperand(2));
3438  case Intrinsic::arm_neon_vtbl2:
3439  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3440  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3441  }
3442 }
3443 
3445  const ARMSubtarget *Subtarget) {
3446  SDLoc dl(Op);
3447  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3448  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3449  if (SSID == SyncScope::SingleThread)
3450  return Op;
3451 
3452  if (!Subtarget->hasDataBarrier()) {
3453  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3454  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3455  // here.
3456  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3457  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3458  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3459  DAG.getConstant(0, dl, MVT::i32));
3460  }
3461 
3462  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3463  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3464  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3465  if (Subtarget->isMClass()) {
3466  // Only a full system barrier exists in the M-class architectures.
3467  Domain = ARM_MB::SY;
3468  } else if (Subtarget->preferISHSTBarriers() &&
3469  Ord == AtomicOrdering::Release) {
3470  // Swift happens to implement ISHST barriers in a way that's compatible with
3471  // Release semantics but weaker than ISH so we'd be fools not to use
3472  // it. Beware: other processors probably don't!
3473  Domain = ARM_MB::ISHST;
3474  }
3475 
3476  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3477  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3478  DAG.getConstant(Domain, dl, MVT::i32));
3479 }
3480 
3482  const ARMSubtarget *Subtarget) {
3483  // ARM pre v5TE and Thumb1 does not have preload instructions.
3484  if (!(Subtarget->isThumb2() ||
3485  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3486  // Just preserve the chain.
3487  return Op.getOperand(0);
3488 
3489  SDLoc dl(Op);
3490  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3491  if (!isRead &&
3492  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3493  // ARMv7 with MP extension has PLDW.
3494  return Op.getOperand(0);
3495 
3496  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3497  if (Subtarget->isThumb()) {
3498  // Invert the bits.
3499  isRead = ~isRead & 1;
3500  isData = ~isData & 1;
3501  }
3502 
3503  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3504  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3505  DAG.getConstant(isData, dl, MVT::i32));
3506 }
3507 
3509  MachineFunction &MF = DAG.getMachineFunction();
3510  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3511 
3512  // vastart just stores the address of the VarArgsFrameIndex slot into the
3513  // memory location argument.
3514  SDLoc dl(Op);
3515  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3516  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3517  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3518  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3519  MachinePointerInfo(SV));
3520 }
3521 
3522 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3523  CCValAssign &NextVA,
3524  SDValue &Root,
3525  SelectionDAG &DAG,
3526  const SDLoc &dl) const {
3527  MachineFunction &MF = DAG.getMachineFunction();
3529 
3530  const TargetRegisterClass *RC;
3531  if (AFI->isThumb1OnlyFunction())
3532  RC = &ARM::tGPRRegClass;
3533  else
3534  RC = &ARM::GPRRegClass;
3535 
3536  // Transform the arguments stored in physical registers into virtual ones.
3537  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3538  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3539 
3540  SDValue ArgValue2;
3541  if (NextVA.isMemLoc()) {
3542  MachineFrameInfo &MFI = MF.getFrameInfo();
3543  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3544 
3545  // Create load node to retrieve arguments from the stack.
3546  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3547  ArgValue2 = DAG.getLoad(
3548  MVT::i32, dl, Root, FIN,
3550  } else {
3551  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3552  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3553  }
3554  if (!Subtarget->isLittle())
3555  std::swap (ArgValue, ArgValue2);
3556  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3557 }
3558 
3559 // The remaining GPRs hold either the beginning of variable-argument
3560 // data, or the beginning of an aggregate passed by value (usually
3561 // byval). Either way, we allocate stack slots adjacent to the data
3562 // provided by our caller, and store the unallocated registers there.
3563 // If this is a variadic function, the va_list pointer will begin with
3564 // these values; otherwise, this reassembles a (byval) structure that
3565 // was split between registers and memory.
3566 // Return: The frame index registers were stored into.
3567 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3568  const SDLoc &dl, SDValue &Chain,
3569  const Value *OrigArg,
3570  unsigned InRegsParamRecordIdx,
3571  int ArgOffset, unsigned ArgSize) const {
3572  // Currently, two use-cases possible:
3573  // Case #1. Non-var-args function, and we meet first byval parameter.
3574  // Setup first unallocated register as first byval register;
3575  // eat all remained registers
3576  // (these two actions are performed by HandleByVal method).
3577  // Then, here, we initialize stack frame with
3578  // "store-reg" instructions.
3579  // Case #2. Var-args function, that doesn't contain byval parameters.
3580  // The same: eat all remained unallocated registers,
3581  // initialize stack frame.
3582 
3583  MachineFunction &MF = DAG.getMachineFunction();
3584  MachineFrameInfo &MFI = MF.getFrameInfo();
3586  unsigned RBegin, REnd;
3587  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3588  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3589  } else {
3590  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3591  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3592  REnd = ARM::R4;
3593  }
3594 
3595  if (REnd != RBegin)
3596  ArgOffset = -4 * (ARM::R4 - RBegin);
3597 
3598  auto PtrVT = getPointerTy(DAG.getDataLayout());
3599  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3600  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3601 
3602  SmallVector<SDValue, 4> MemOps;
3603  const TargetRegisterClass *RC =
3604  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3605 
3606  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3607  unsigned VReg = MF.addLiveIn(Reg, RC);
3608  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3609  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3610  MachinePointerInfo(OrigArg, 4 * i));
3611  MemOps.push_back(Store);
3612  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3613  }
3614 
3615  if (!MemOps.empty())
3616  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3617  return FrameIndex;
3618 }
3619 
3620 // Setup stack frame, the va_list pointer will start from.
3621 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3622  const SDLoc &dl, SDValue &Chain,
3623  unsigned ArgOffset,
3624  unsigned TotalArgRegsSaveSize,
3625  bool ForceMutable) const {
3626  MachineFunction &MF = DAG.getMachineFunction();
3628 
3629  // Try to store any remaining integer argument regs
3630  // to their spots on the stack so that they may be loaded by dereferencing
3631  // the result of va_next.
3632  // If there is no regs to be stored, just point address after last
3633  // argument passed via stack.
3634  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3635  CCInfo.getInRegsParamsCount(),
3636  CCInfo.getNextStackOffset(), 4);
3637  AFI->setVarArgsFrameIndex(FrameIndex);
3638 }
3639 
3640 SDValue ARMTargetLowering::LowerFormalArguments(
3641  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3642  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3643  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3644  MachineFunction &MF = DAG.getMachineFunction();
3645  MachineFrameInfo &MFI = MF.getFrameInfo();
3646 
3648 
3649  // Assign locations to all of the incoming arguments.
3651  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3652  *DAG.getContext());
3653  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3654 
3655  SmallVector<SDValue, 16> ArgValues;
3656  SDValue ArgValue;
3658  unsigned CurArgIdx = 0;
3659 
3660  // Initially ArgRegsSaveSize is zero.
3661  // Then we increase this value each time we meet byval parameter.
3662  // We also increase this value in case of varargs function.
3663  AFI->setArgRegsSaveSize(0);
3664 
3665  // Calculate the amount of stack space that we need to allocate to store
3666  // byval and variadic arguments that are passed in registers.
3667  // We need to know this before we allocate the first byval or variadic
3668  // argument, as they will be allocated a stack slot below the CFA (Canonical
3669  // Frame Address, the stack pointer at entry to the function).
3670  unsigned ArgRegBegin = ARM::R4;
3671  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3672  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3673  break;
3674 
3675  CCValAssign &VA = ArgLocs[i];
3676  unsigned Index = VA.getValNo();
3677  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3678  if (!Flags.isByVal())
3679  continue;
3680 
3681  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3682  unsigned RBegin, REnd;
3683  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3684  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3685 
3686  CCInfo.nextInRegsParam();
3687  }
3688  CCInfo.rewindByValRegsInfo();
3689 
3690  int lastInsIndex = -1;
3691  if (isVarArg && MFI.hasVAStart()) {
3692  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3693  if (RegIdx != array_lengthof(GPRArgRegs))
3694  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3695  }
3696 
3697  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3698  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3699  auto PtrVT = getPointerTy(DAG.getDataLayout());
3700 
3701  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3702  CCValAssign &VA = ArgLocs[i];
3703  if (Ins[VA.getValNo()].isOrigArg()) {
3704  std::advance(CurOrigArg,
3705  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3706  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3707  }
3708  // Arguments stored in registers.
3709  if (VA.isRegLoc()) {
3710  EVT RegVT = VA.getLocVT();
3711 
3712  if (VA.needsCustom()) {
3713  // f64 and vector types are split up into multiple registers or
3714  // combinations of registers and stack slots.
3715  if (VA.getLocVT() == MVT::v2f64) {
3716  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3717  Chain, DAG, dl);
3718  VA = ArgLocs[++i]; // skip ahead to next loc
3719  SDValue ArgValue2;
3720  if (VA.isMemLoc()) {
3721  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3722  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3723  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3725  DAG.getMachineFunction(), FI));
3726  } else {
3727  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3728  Chain, DAG, dl);
3729  }
3730  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3731  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3732  ArgValue, ArgValue1,
3733  DAG.getIntPtrConstant(0, dl));
3734  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3735  ArgValue, ArgValue2,
3736  DAG.getIntPtrConstant(1, dl));
3737  } else
3738  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3739  } else {
3740  const TargetRegisterClass *RC;
3741 
3742 
3743  if (RegVT == MVT::f16)
3744  RC = &ARM::HPRRegClass;
3745  else if (RegVT == MVT::f32)
3746  RC = &ARM::SPRRegClass;
3747  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3748  RC = &ARM::DPRRegClass;
3749  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3750  RC = &ARM::QPRRegClass;
3751  else if (RegVT == MVT::i32)
3752  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3753  : &ARM::GPRRegClass;
3754  else
3755  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3756 
3757  // Transform the arguments in physical registers into virtual ones.
3758  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3759  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3760  }
3761 
3762  // If this is an 8 or 16-bit value, it is really passed promoted
3763  // to 32 bits. Insert an assert[sz]ext to capture this, then
3764  // truncate to the right size.
3765  switch (VA.getLocInfo()) {
3766  default: llvm_unreachable("Unknown loc info!");
3767  case CCValAssign::Full: break;
3768  case CCValAssign::BCvt:
3769  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3770  break;
3771  case CCValAssign::SExt:
3772  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3773  DAG.getValueType(VA.getValVT()));
3774  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3775  break;
3776  case CCValAssign::ZExt:
3777  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3778  DAG.getValueType(VA.getValVT()));
3779  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3780  break;
3781  }
3782 
3783  InVals.push_back(ArgValue);
3784  } else { // VA.isRegLoc()
3785  // sanity check
3786  assert(VA.isMemLoc());
3787  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3788 
3789  int index = VA.getValNo();
3790 
3791  // Some Ins[] entries become multiple ArgLoc[] entries.
3792  // Process them only once.
3793  if (index != lastInsIndex)
3794  {
3795  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3796  // FIXME: For now, all byval parameter objects are marked mutable.
3797  // This can be changed with more analysis.
3798  // In case of tail call optimization mark all arguments mutable.
3799  // Since they could be overwritten by lowering of arguments in case of
3800  // a tail call.
3801  if (Flags.isByVal()) {
3802  assert(Ins[index].isOrigArg() &&
3803  "Byval arguments cannot be implicit");
3804  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3805 
3806  int FrameIndex = StoreByValRegs(
3807  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3808  VA.getLocMemOffset(), Flags.getByValSize());
3809  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3810  CCInfo.nextInRegsParam();
3811  } else {
3812  unsigned FIOffset = VA.getLocMemOffset();
3813  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3814  FIOffset, true);
3815 
3816  // Create load nodes to retrieve arguments from the stack.
3817  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3818  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3820  DAG.getMachineFunction(), FI)));
3821  }
3822  lastInsIndex = index;
3823  }
3824  }
3825  }
3826 
3827  // varargs
3828  if (isVarArg && MFI.hasVAStart())
3829  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3830  CCInfo.getNextStackOffset(),
3831  TotalArgRegsSaveSize);
3832 
3834 
3835  return Chain;
3836 }
3837 
3838 /// isFloatingPointZero - Return true if this is +0.0.
3839 static bool isFloatingPointZero(SDValue Op) {
3840  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3841  return CFP->getValueAPF().isPosZero();
3842  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3843  // Maybe this has already been legalized into the constant pool?
3844  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3845  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3846  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3847  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3848  return CFP->getValueAPF().isPosZero();
3849  }
3850  } else if (Op->getOpcode() == ISD::BITCAST &&
3851  Op->getValueType(0) == MVT::f64) {
3852  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3853  // created by LowerConstantFP().
3854  SDValue BitcastOp = Op->getOperand(0);
3855  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3856  isNullConstant(BitcastOp->getOperand(0)))
3857  return true;
3858  }
3859  return false;
3860 }
3861 
3862 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3863 /// the given operands.
3864 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3865  SDValue &ARMcc, SelectionDAG &DAG,
3866  const SDLoc &dl) const {
3867  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3868  unsigned C = RHSC->getZExtValue();
3869  if (!isLegalICmpImmediate((int32_t)C)) {
3870  // Constant does not fit, try adjusting it by one.
3871  switch (CC) {
3872  default: break;
3873  case ISD::SETLT:
3874  case ISD::SETGE:
3875  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3876  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3877  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3878  }
3879  break;
3880  case ISD::SETULT:
3881  case ISD::SETUGE:
3882  if (C != 0 && isLegalICmpImmediate(C-1)) {
3883  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3884  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3885  }
3886  break;
3887  case ISD::SETLE:
3888  case ISD::SETGT:
3889  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3890  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3891  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3892  }
3893  break;
3894  case ISD::SETULE:
3895  case ISD::SETUGT:
3896  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3897  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3898  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3899  }
3900  break;
3901  }
3902  }
3903  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3905  // In ARM and Thumb-2, the compare instructions can shift their second
3906  // operand.
3908  std::swap(LHS, RHS);
3909  }
3910 
3912  ARMISD::NodeType CompareType;
3913  switch (CondCode) {
3914  default:
3915  CompareType = ARMISD::CMP;
3916  break;
3917  case ARMCC::EQ:
3918  case ARMCC::NE:
3919  // Uses only Z Flag
3920  CompareType = ARMISD::CMPZ;
3921  break;
3922  }
3923  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3924  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3925 }
3926 
3927 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3928 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3929  SelectionDAG &DAG, const SDLoc &dl,
3930  bool InvalidOnQNaN) const {
3931  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3932  SDValue Cmp;
3933  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3934  if (!isFloatingPointZero(RHS))
3935  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3936  else
3937  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3938  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3939 }
3940 
3941 /// duplicateCmp - Glue values can have only one use, so this function
3942 /// duplicates a comparison node.
3943 SDValue
3944 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3945  unsigned Opc = Cmp.getOpcode();
3946  SDLoc DL(Cmp);
3947  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3948  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3949 
3950  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3951  Cmp = Cmp.getOperand(0);
3952  Opc = Cmp.getOpcode();
3953  if (Opc == ARMISD::CMPFP)
3954  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3955  Cmp.getOperand(1), Cmp.getOperand(2));
3956  else {
3957  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3958  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3959  Cmp.getOperand(1));
3960  }
3961  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3962 }
3963 
3964 // This function returns three things: the arithmetic computation itself
3965 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3966 // comparison and the condition code define the case in which the arithmetic
3967 // computation *does not* overflow.
3968 std::pair<SDValue, SDValue>
3969 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3970  SDValue &ARMcc) const {
3971  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3972 
3973  SDValue Value, OverflowCmp;
3974  SDValue LHS = Op.getOperand(0);
3975  SDValue RHS = Op.getOperand(1);
3976  SDLoc dl(Op);
3977 
3978  // FIXME: We are currently always generating CMPs because we don't support
3979  // generating CMN through the backend. This is not as good as the natural
3980  // CMP case because it causes a register dependency and cannot be folded
3981  // later.
3982 
3983  switch (Op.getOpcode()) {
3984  default:
3985  llvm_unreachable("Unknown overflow instruction!");
3986  case ISD::SADDO:
3987  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3988  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3989  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3990  break;
3991  case ISD::UADDO:
3992  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3993  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3994  // We do not use it in the USUBO case as Value may not be used.
3995  Value = DAG.getNode(ARMISD::ADDC, dl,
3996  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3997  .getValue(0);
3998  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3999  break;
4000  case ISD::SSUBO:
4001  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4002  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4003  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4004  break;
4005  case ISD::USUBO:
4006  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4007  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4008  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4009  break;
4010  case ISD::UMULO:
4011  // We generate a UMUL_LOHI and then check if the high word is 0.
4012  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4013  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4014  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4015  LHS, RHS);
4016  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4017  DAG.getConstant(0, dl, MVT::i32));
4018  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4019  break;
4020  case ISD::SMULO:
4021  // We generate a SMUL_LOHI and then check if all the bits of the high word
4022  // are the same as the sign bit of the low word.
4023  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4024  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4025  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4026  LHS, RHS);
4027  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4028  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4029  Value.getValue(0),
4030  DAG.getConstant(31, dl, MVT::i32)));
4031  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4032  break;
4033  } // switch (...)
4034 
4035  return std::make_pair(Value, OverflowCmp);
4036 }
4037 
4038 SDValue
4039 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4040  // Let legalize expand this if it isn't a legal type yet.
4042  return SDValue();
4043 
4044  SDValue Value, OverflowCmp;
4045  SDValue ARMcc;
4046  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG