LLVM  8.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
65 #include "llvm/IR/Attributes.h"
66 #include "llvm/IR/CallingConv.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DebugLoc.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/Function.h"
73 #include "llvm/IR/GlobalAlias.h"
74 #include "llvm/IR/GlobalValue.h"
75 #include "llvm/IR/GlobalVariable.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InlineAsm.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/Module.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 
117 #define DEBUG_TYPE "arm-isel"
118 
119 STATISTIC(NumTailCalls, "Number of tail calls");
120 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 STATISTIC(NumConstpoolPromoted,
123  "Number of constants with their storage promoted into constant pools");
124 
125 static cl::opt<bool>
126 ARMInterworking("arm-interworking", cl::Hidden,
127  cl::desc("Enable / disable ARM interworking (for debugging only)"),
128  cl::init(true));
129 
131  "arm-promote-constant", cl::Hidden,
132  cl::desc("Enable / disable promotion of unnamed_addr constants into "
133  "constant pools"),
134  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
136  "arm-promote-constant-max-size", cl::Hidden,
137  cl::desc("Maximum size of constant to promote into a constant pool"),
138  cl::init(64));
140  "arm-promote-constant-max-total", cl::Hidden,
141  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142  cl::init(128));
143 
144 // The APCS parameter registers.
145 static const MCPhysReg GPRArgRegs[] = {
146  ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 };
148 
149 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150  MVT PromotedBitwiseVT) {
151  if (VT != PromotedLdStVT) {
153  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 
156  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157  }
158 
159  MVT ElemTy = VT.getVectorElementType();
160  if (ElemTy != MVT::f64)
164  if (ElemTy == MVT::i32) {
169  } else {
174  }
183  if (VT.isInteger()) {
187  }
188 
189  // Promote all bit-wise operations.
190  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197  }
198 
199  // Neon does not support vector divide/remainder operations.
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211 }
212 
213 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPRRegClass);
215  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 }
217 
218 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219  addRegisterClass(VT, &ARM::DPairRegClass);
220  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 }
222 
224  const ARMSubtarget &STI)
225  : TargetLowering(TM), Subtarget(&STI) {
226  RegInfo = Subtarget->getRegisterInfo();
227  Itins = Subtarget->getInstrItineraryData();
228 
231 
232  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233  !Subtarget->isTargetWatchOS()) {
234  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
239  }
240 
241  if (Subtarget->isTargetMachO()) {
242  // Uses VFP for Thumb libfuncs if available.
243  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245  static const struct {
246  const RTLIB::Libcall Op;
247  const char * const Name;
248  const ISD::CondCode Cond;
249  } LibraryCalls[] = {
250  // Single-precision floating-point arithmetic.
251  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255 
256  // Double-precision floating-point arithmetic.
257  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261 
262  // Single-precision comparisons.
263  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271 
272  // Double-precision comparisons.
273  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281 
282  // Floating-point to integer conversions.
283  // i64 conversions are done via library routines even when generating VFP
284  // instructions, so use the same ones.
285  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289 
290  // Conversions between floating types.
291  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293 
294  // Integer to floating-point conversions.
295  // i64 conversions are done via library routines even when generating VFP
296  // instructions, so use the same ones.
297  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298  // e.g., __floatunsidf vs. __floatunssidfvfp.
299  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303  };
304 
305  for (const auto &LC : LibraryCalls) {
306  setLibcallName(LC.Op, LC.Name);
307  if (LC.Cond != ISD::SETCC_INVALID)
308  setCmpLibcallCC(LC.Op, LC.Cond);
309  }
310  }
311  }
312 
313  // These libcalls are not available in 32-bit.
314  setLibcallName(RTLIB::SHL_I128, nullptr);
315  setLibcallName(RTLIB::SRL_I128, nullptr);
316  setLibcallName(RTLIB::SRA_I128, nullptr);
317 
318  // RTLIB
319  if (Subtarget->isAAPCS_ABI() &&
320  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
321  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
322  static const struct {
323  const RTLIB::Libcall Op;
324  const char * const Name;
325  const CallingConv::ID CC;
326  const ISD::CondCode Cond;
327  } LibraryCalls[] = {
328  // Double-precision floating-point arithmetic helper functions
329  // RTABI chapter 4.1.2, Table 2
330  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
331  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334 
335  // Double-precision floating-point comparison helper functions
336  // RTABI chapter 4.1.2, Table 3
337  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
338  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
339  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
340  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
341  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
342  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
343  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
344  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
345 
346  // Single-precision floating-point arithmetic helper functions
347  // RTABI chapter 4.1.2, Table 4
348  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
349  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
350  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
351  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
352 
353  // Single-precision floating-point comparison helper functions
354  // RTABI chapter 4.1.2, Table 5
355  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
356  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
357  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
358  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
359  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
360  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
361  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
362  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
363 
364  // Floating-point to integer conversions.
365  // RTABI chapter 4.1.2, Table 6
366  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
367  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
368  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
369  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
370  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
371  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
372  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
373  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374 
375  // Conversions between floating types.
376  // RTABI chapter 4.1.2, Table 7
377  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 
381  // Integer to floating-point conversions.
382  // RTABI chapter 4.1.2, Table 8
383  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
384  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
390  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391 
392  // Long long helper functions
393  // RTABI chapter 4.2, Table 9
394  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 
399  // Integer division functions
400  // RTABI chapter 4.3.1
401  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
408  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409  };
410 
411  for (const auto &LC : LibraryCalls) {
412  setLibcallName(LC.Op, LC.Name);
413  setLibcallCallingConv(LC.Op, LC.CC);
414  if (LC.Cond != ISD::SETCC_INVALID)
415  setCmpLibcallCC(LC.Op, LC.Cond);
416  }
417 
418  // EABI dependent RTLIB
419  if (TM.Options.EABIVersion == EABI::EABI4 ||
421  static const struct {
422  const RTLIB::Libcall Op;
423  const char *const Name;
424  const CallingConv::ID CC;
425  const ISD::CondCode Cond;
426  } MemOpsLibraryCalls[] = {
427  // Memory operations
428  // RTABI chapter 4.3.4
430  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
431  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
432  };
433 
434  for (const auto &LC : MemOpsLibraryCalls) {
435  setLibcallName(LC.Op, LC.Name);
436  setLibcallCallingConv(LC.Op, LC.CC);
437  if (LC.Cond != ISD::SETCC_INVALID)
438  setCmpLibcallCC(LC.Op, LC.Cond);
439  }
440  }
441  }
442 
443  if (Subtarget->isTargetWindows()) {
444  static const struct {
445  const RTLIB::Libcall Op;
446  const char * const Name;
447  const CallingConv::ID CC;
448  } LibraryCalls[] = {
449  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
450  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
451  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
452  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
453  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
454  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
455  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
456  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
457  };
458 
459  for (const auto &LC : LibraryCalls) {
460  setLibcallName(LC.Op, LC.Name);
461  setLibcallCallingConv(LC.Op, LC.CC);
462  }
463  }
464 
465  // Use divmod compiler-rt calls for iOS 5.0 and later.
466  if (Subtarget->isTargetMachO() &&
467  !(Subtarget->isTargetIOS() &&
468  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
469  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
470  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
471  }
472 
473  // The half <-> float conversion functions are always soft-float on
474  // non-watchos platforms, but are needed for some targets which use a
475  // hard-float calling convention by default.
476  if (!Subtarget->isTargetWatchABI()) {
477  if (Subtarget->isAAPCS_ABI()) {
478  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
479  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
480  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
481  } else {
482  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
483  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
484  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
485  }
486  }
487 
488  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
489  // a __gnu_ prefix (which is the default).
490  if (Subtarget->isTargetAEABI()) {
491  static const struct {
492  const RTLIB::Libcall Op;
493  const char * const Name;
494  const CallingConv::ID CC;
495  } LibraryCalls[] = {
496  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
497  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
498  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
499  };
500 
501  for (const auto &LC : LibraryCalls) {
502  setLibcallName(LC.Op, LC.Name);
503  setLibcallCallingConv(LC.Op, LC.CC);
504  }
505  }
506 
507  if (Subtarget->isThumb1Only())
508  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
509  else
510  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
511 
512  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
513  !Subtarget->isThumb1Only()) {
514  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
515  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
516  }
517 
518  if (Subtarget->hasFullFP16()) {
519  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
523 
526  }
527 
528  for (MVT VT : MVT::vector_valuetypes()) {
529  for (MVT InnerVT : MVT::vector_valuetypes()) {
530  setTruncStoreAction(VT, InnerVT, Expand);
531  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
532  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
533  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
534  }
535 
540 
542  }
543 
546 
549 
550  if (Subtarget->hasNEON()) {
551  addDRTypeForNEON(MVT::v2f32);
552  addDRTypeForNEON(MVT::v8i8);
553  addDRTypeForNEON(MVT::v4i16);
554  addDRTypeForNEON(MVT::v2i32);
555  addDRTypeForNEON(MVT::v1i64);
556 
557  addQRTypeForNEON(MVT::v4f32);
558  addQRTypeForNEON(MVT::v2f64);
559  addQRTypeForNEON(MVT::v16i8);
560  addQRTypeForNEON(MVT::v8i16);
561  addQRTypeForNEON(MVT::v4i32);
562  addQRTypeForNEON(MVT::v2i64);
563 
564  if (Subtarget->hasFullFP16()) {
565  addQRTypeForNEON(MVT::v8f16);
566  addDRTypeForNEON(MVT::v4f16);
567  }
568 
569  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
570  // neither Neon nor VFP support any arithmetic operations on it.
571  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
572  // supported for v4f32.
576  // FIXME: Code duplication: FDIV and FREM are expanded always, see
577  // ARMTargetLowering::addTypeForNEON method for details.
580  // FIXME: Create unittest.
581  // In another words, find a way when "copysign" appears in DAG with vector
582  // operands.
584  // FIXME: Code duplication: SETCC has custom operation action, see
585  // ARMTargetLowering::addTypeForNEON method for details.
587  // FIXME: Create unittest for FNEG and for FABS.
599  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
606 
621 
622  // Mark v2f32 intrinsics.
637 
638  // Neon does not support some operations on v1i64 and v2i64 types.
640  // Custom handling for some quad-vector types to detect VMULL.
644  // Custom handling for some vector types to avoid expensive expansions
649  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
650  // a destination type that is wider than the source, and nor does
651  // it have a FP_TO_[SU]INT instruction with a narrower destination than
652  // source.
661 
664 
665  // NEON does not have single instruction CTPOP for vectors with element
666  // types wider than 8-bits. However, custom lowering can leverage the
667  // v8i8/v16i8 vcnt instruction.
674 
677 
678  // NEON does not have single instruction CTTZ for vectors.
683 
688 
693 
698 
699  // NEON only has FMA instructions as of VFP4.
700  if (!Subtarget->hasVFP4()) {
703  }
704 
722 
723  // It is legal to extload from v4i8 to v4i16 or v4i32.
725  MVT::v2i32}) {
726  for (MVT VT : MVT::integer_vector_valuetypes()) {
730  }
731  }
732  }
733 
734  if (Subtarget->isFPOnlySP()) {
735  // When targeting a floating-point unit with only single-precision
736  // operations, f64 is legal for the few double-precision instructions which
737  // are present However, no double-precision operations other than moves,
738  // loads and stores are provided by the hardware.
771  }
772 
774 
775  // ARM does not have floating-point extending loads.
776  for (MVT VT : MVT::fp_valuetypes()) {
779  }
780 
781  // ... or truncating stores
785 
786  // ARM does not have i1 sign extending load.
787  for (MVT VT : MVT::integer_valuetypes())
789 
790  // ARM supports all 4 flavors of integer indexed load / store.
791  if (!Subtarget->isThumb1Only()) {
792  for (unsigned im = (unsigned)ISD::PRE_INC;
802  }
803  } else {
804  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
807  }
808 
813 
816 
817  // i64 operation support.
820  if (Subtarget->isThumb1Only()) {
823  }
824  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
825  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
827 
834 
835  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
836  if (Subtarget->isThumb1Only()) {
840  }
841 
842  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
844 
845  // ARM does not have ROTL.
847  for (MVT VT : MVT::vector_valuetypes()) {
850  }
853  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
856  }
857 
858  // @llvm.readcyclecounter requires the Performance Monitors extension.
859  // Default to the 0 expansion on unsupported platforms.
860  // FIXME: Technically there are older ARM CPUs that have
861  // implementation-specific ways of obtaining this information.
862  if (Subtarget->hasPerfMon())
864 
865  // Only ARMv6 has BSWAP.
866  if (!Subtarget->hasV6Ops())
868 
869  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
870  : Subtarget->hasDivideInARMMode();
871  if (!hasDivide) {
872  // These are expanded into libcalls if the cpu doesn't have HW divider.
875  }
876 
877  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
880 
883  }
884 
887 
888  // Register based DivRem for AEABI (RTABI 4.2)
889  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
890  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
891  Subtarget->isTargetWindows()) {
894  HasStandaloneRem = false;
895 
896  if (Subtarget->isTargetWindows()) {
897  const struct {
898  const RTLIB::Libcall Op;
899  const char * const Name;
900  const CallingConv::ID CC;
901  } LibraryCalls[] = {
902  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
903  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
904  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
905  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
906 
907  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
908  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
909  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
910  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
911  };
912 
913  for (const auto &LC : LibraryCalls) {
914  setLibcallName(LC.Op, LC.Name);
915  setLibcallCallingConv(LC.Op, LC.CC);
916  }
917  } else {
918  const struct {
919  const RTLIB::Libcall Op;
920  const char * const Name;
921  const CallingConv::ID CC;
922  } LibraryCalls[] = {
923  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
924  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
925  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
926  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
927 
928  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
929  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
930  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
931  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
932  };
933 
934  for (const auto &LC : LibraryCalls) {
935  setLibcallName(LC.Op, LC.Name);
936  setLibcallCallingConv(LC.Op, LC.CC);
937  }
938  }
939 
944  } else {
947  }
948 
949  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
950  for (auto &VT : {MVT::f32, MVT::f64})
952 
957 
959 
960  // Use the default implementation.
967 
968  if (Subtarget->isTargetWindows())
970  else
972 
973  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
974  // the default expansion.
975  InsertFencesForAtomic = false;
976  if (Subtarget->hasAnyDataBarrier() &&
977  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
978  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
979  // to ldrex/strex loops already.
981  if (!Subtarget->isThumb() || !Subtarget->isMClass())
983 
984  // On v8, we have particularly efficient implementations of atomic fences
985  // if they can be combined with nearby atomic loads and stores.
986  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
987  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
988  InsertFencesForAtomic = true;
989  }
990  } else {
991  // If there's anything we can use as a barrier, go through custom lowering
992  // for ATOMIC_FENCE.
993  // If target has DMB in thumb, Fences can be inserted.
994  if (Subtarget->hasDataBarrier())
995  InsertFencesForAtomic = true;
996 
998  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
999 
1000  // Set them all for expansion, which will force libcalls.
1013  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1014  // Unordered/Monotonic case.
1015  if (!InsertFencesForAtomic) {
1018  }
1019  }
1020 
1022 
1023  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1024  if (!Subtarget->hasV6Ops()) {
1027  }
1029 
1030  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1031  !Subtarget->isThumb1Only()) {
1032  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1033  // iff target supports vfp2.
1036  }
1037 
1038  // We want to custom lower some of our intrinsics.
1043  if (Subtarget->useSjLjEH())
1044  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1045 
1055  if (Subtarget->hasFullFP16()) {
1059  }
1060 
1062 
1065  if (Subtarget->hasFullFP16())
1070 
1071  // We don't support sin/cos/fmod/copysign/pow
1080  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1081  !Subtarget->isThumb1Only()) {
1084  }
1087 
1088  if (!Subtarget->hasVFP4()) {
1091  }
1092 
1093  // Various VFP goodness
1094  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1095  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1096  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1099  }
1100 
1101  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1102  if (!Subtarget->hasFP16()) {
1105  }
1106  }
1107 
1108  // Use __sincos_stret if available.
1109  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1110  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1113  }
1114 
1115  // FP-ARMv8 implements a lot of rounding-like FP operations.
1116  if (Subtarget->hasFPARMv8()) {
1129 
1130  if (!Subtarget->isFPOnlySP()) {
1139  }
1140  }
1141 
1142  if (Subtarget->hasNEON()) {
1143  // vmin and vmax aren't available in a scalar form, so we use
1144  // a NEON instruction with an undef lane instead.
1153 
1154  if (Subtarget->hasFullFP16()) {
1159 
1164  }
1165  }
1166 
1167  // We have target-specific dag combine patterns for the following nodes:
1168  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1175 
1176  if (Subtarget->hasV6Ops())
1178 
1180 
1181  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1182  !Subtarget->hasVFP2())
1184  else
1186 
1187  //// temporary - rewrite interface to use type
1188  MaxStoresPerMemset = 8;
1190  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1192  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1194 
1195  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1196  // are at least 4 bytes aligned.
1198 
1199  // Prefer likely predicted branches to selects on out-of-order cores.
1200  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1201 
1203 
1204  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1205 }
1206 
1208  return Subtarget->useSoftFloat();
1209 }
1210 
1211 // FIXME: It might make sense to define the representative register class as the
1212 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1213 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1214 // SPR's representative would be DPR_VFP2. This should work well if register
1215 // pressure tracking were modified such that a register use would increment the
1216 // pressure of the register class's representative and all of it's super
1217 // classes' representatives transitively. We have not implemented this because
1218 // of the difficulty prior to coalescing of modeling operand register classes
1219 // due to the common occurrence of cross class copies and subregister insertions
1220 // and extractions.
1221 std::pair<const TargetRegisterClass *, uint8_t>
1223  MVT VT) const {
1224  const TargetRegisterClass *RRC = nullptr;
1225  uint8_t Cost = 1;
1226  switch (VT.SimpleTy) {
1227  default:
1229  // Use DPR as representative register class for all floating point
1230  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1231  // the cost is 1 for both f32 and f64.
1232  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1233  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1234  RRC = &ARM::DPRRegClass;
1235  // When NEON is used for SP, only half of the register file is available
1236  // because operations that define both SP and DP results will be constrained
1237  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1238  // coalescing by double-counting the SP regs. See the FIXME above.
1239  if (Subtarget->useNEONForSinglePrecisionFP())
1240  Cost = 2;
1241  break;
1242  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1243  case MVT::v4f32: case MVT::v2f64:
1244  RRC = &ARM::DPRRegClass;
1245  Cost = 2;
1246  break;
1247  case MVT::v4i64:
1248  RRC = &ARM::DPRRegClass;
1249  Cost = 4;
1250  break;
1251  case MVT::v8i64:
1252  RRC = &ARM::DPRRegClass;
1253  Cost = 8;
1254  break;
1255  }
1256  return std::make_pair(RRC, Cost);
1257 }
1258 
1259 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1260  switch ((ARMISD::NodeType)Opcode) {
1261  case ARMISD::FIRST_NUMBER: break;
1262  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1263  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1264  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1265  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1266  case ARMISD::CALL: return "ARMISD::CALL";
1267  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1268  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1269  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1270  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1271  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1272  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1273  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1274  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1275  case ARMISD::CMP: return "ARMISD::CMP";
1276  case ARMISD::CMN: return "ARMISD::CMN";
1277  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1278  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1279  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1280  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1281  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1282 
1283  case ARMISD::CMOV: return "ARMISD::CMOV";
1284 
1285  case ARMISD::SSAT: return "ARMISD::SSAT";
1286  case ARMISD::USAT: return "ARMISD::USAT";
1287 
1288  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1289  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1290  case ARMISD::RRX: return "ARMISD::RRX";
1291 
1292  case ARMISD::ADDC: return "ARMISD::ADDC";
1293  case ARMISD::ADDE: return "ARMISD::ADDE";
1294  case ARMISD::SUBC: return "ARMISD::SUBC";
1295  case ARMISD::SUBE: return "ARMISD::SUBE";
1296 
1297  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1298  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1299  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1300  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1301  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1302 
1303  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1304  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1305  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1306 
1307  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1308 
1309  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1310 
1311  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1312 
1313  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1314 
1315  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1316 
1317  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1318  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1319 
1320  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1321  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1322  case ARMISD::VCGE: return "ARMISD::VCGE";
1323  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1324  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1325  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1326  case ARMISD::VCGT: return "ARMISD::VCGT";
1327  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1328  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1329  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1330  case ARMISD::VTST: return "ARMISD::VTST";
1331 
1332  case ARMISD::VSHL: return "ARMISD::VSHL";
1333  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1334  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1335  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1336  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1337  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1338  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1339  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1340  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1341  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1342  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1343  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1344  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1345  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1346  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1347  case ARMISD::VSLI: return "ARMISD::VSLI";
1348  case ARMISD::VSRI: return "ARMISD::VSRI";
1349  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1350  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1351  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1352  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1353  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1354  case ARMISD::VDUP: return "ARMISD::VDUP";
1355  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1356  case ARMISD::VEXT: return "ARMISD::VEXT";
1357  case ARMISD::VREV64: return "ARMISD::VREV64";
1358  case ARMISD::VREV32: return "ARMISD::VREV32";
1359  case ARMISD::VREV16: return "ARMISD::VREV16";
1360  case ARMISD::VZIP: return "ARMISD::VZIP";
1361  case ARMISD::VUZP: return "ARMISD::VUZP";
1362  case ARMISD::VTRN: return "ARMISD::VTRN";
1363  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1364  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1365  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1366  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1367  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1368  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1369  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1370  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1371  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1372  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1373  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1374  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1375  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1376  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1377  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1378  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1379  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1380  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1381  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1382  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1383  case ARMISD::BFI: return "ARMISD::BFI";
1384  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1385  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1386  case ARMISD::VBSL: return "ARMISD::VBSL";
1387  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1388  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1389  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1390  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1391  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1392  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1393  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1394  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1395  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1396  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1397  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1398  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1399  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1400  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1401  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1402  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1403  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1404  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1405  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1406  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1407  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1408  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1409  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1410  }
1411  return nullptr;
1412 }
1413 
1415  EVT VT) const {
1416  if (!VT.isVector())
1417  return getPointerTy(DL);
1419 }
1420 
1421 /// getRegClassFor - Return the register class that should be used for the
1422 /// specified value type.
1424  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1425  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1426  // load / store 4 to 8 consecutive D registers.
1427  if (Subtarget->hasNEON()) {
1428  if (VT == MVT::v4i64)
1429  return &ARM::QQPRRegClass;
1430  if (VT == MVT::v8i64)
1431  return &ARM::QQQQPRRegClass;
1432  }
1433  return TargetLowering::getRegClassFor(VT);
1434 }
1435 
1436 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1437 // source/dest is aligned and the copy size is large enough. We therefore want
1438 // to align such objects passed to memory intrinsics.
1440  unsigned &PrefAlign) const {
1441  if (!isa<MemIntrinsic>(CI))
1442  return false;
1443  MinSize = 8;
1444  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1445  // cycle faster than 4-byte aligned LDM.
1446  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1447  return true;
1448 }
1449 
1450 // Create a fast isel object.
1451 FastISel *
1453  const TargetLibraryInfo *libInfo) const {
1454  return ARM::createFastISel(funcInfo, libInfo);
1455 }
1456 
1458  unsigned NumVals = N->getNumValues();
1459  if (!NumVals)
1460  return Sched::RegPressure;
1461 
1462  for (unsigned i = 0; i != NumVals; ++i) {
1463  EVT VT = N->getValueType(i);
1464  if (VT == MVT::Glue || VT == MVT::Other)
1465  continue;
1466  if (VT.isFloatingPoint() || VT.isVector())
1467  return Sched::ILP;
1468  }
1469 
1470  if (!N->isMachineOpcode())
1471  return Sched::RegPressure;
1472 
1473  // Load are scheduled for latency even if there instruction itinerary
1474  // is not available.
1475  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1476  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1477 
1478  if (MCID.getNumDefs() == 0)
1479  return Sched::RegPressure;
1480  if (!Itins->isEmpty() &&
1481  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1482  return Sched::ILP;
1483 
1484  return Sched::RegPressure;
1485 }
1486 
1487 //===----------------------------------------------------------------------===//
1488 // Lowering Code
1489 //===----------------------------------------------------------------------===//
1490 
1491 static bool isSRL16(const SDValue &Op) {
1492  if (Op.getOpcode() != ISD::SRL)
1493  return false;
1494  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1495  return Const->getZExtValue() == 16;
1496  return false;
1497 }
1498 
1499 static bool isSRA16(const SDValue &Op) {
1500  if (Op.getOpcode() != ISD::SRA)
1501  return false;
1502  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1503  return Const->getZExtValue() == 16;
1504  return false;
1505 }
1506 
1507 static bool isSHL16(const SDValue &Op) {
1508  if (Op.getOpcode() != ISD::SHL)
1509  return false;
1510  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1511  return Const->getZExtValue() == 16;
1512  return false;
1513 }
1514 
1515 // Check for a signed 16-bit value. We special case SRA because it makes it
1516 // more simple when also looking for SRAs that aren't sign extending a
1517 // smaller value. Without the check, we'd need to take extra care with
1518 // checking order for some operations.
1519 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1520  if (isSRA16(Op))
1521  return isSHL16(Op.getOperand(0));
1522  return DAG.ComputeNumSignBits(Op) == 17;
1523 }
1524 
1525 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1527  switch (CC) {
1528  default: llvm_unreachable("Unknown condition code!");
1529  case ISD::SETNE: return ARMCC::NE;
1530  case ISD::SETEQ: return ARMCC::EQ;
1531  case ISD::SETGT: return ARMCC::GT;
1532  case ISD::SETGE: return ARMCC::GE;
1533  case ISD::SETLT: return ARMCC::LT;
1534  case ISD::SETLE: return ARMCC::LE;
1535  case ISD::SETUGT: return ARMCC::HI;
1536  case ISD::SETUGE: return ARMCC::HS;
1537  case ISD::SETULT: return ARMCC::LO;
1538  case ISD::SETULE: return ARMCC::LS;
1539  }
1540 }
1541 
1542 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1544  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1545  CondCode2 = ARMCC::AL;
1546  InvalidOnQNaN = true;
1547  switch (CC) {
1548  default: llvm_unreachable("Unknown FP condition!");
1549  case ISD::SETEQ:
1550  case ISD::SETOEQ:
1551  CondCode = ARMCC::EQ;
1552  InvalidOnQNaN = false;
1553  break;
1554  case ISD::SETGT:
1555  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1556  case ISD::SETGE:
1557  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1558  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1559  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1560  case ISD::SETONE:
1561  CondCode = ARMCC::MI;
1562  CondCode2 = ARMCC::GT;
1563  InvalidOnQNaN = false;
1564  break;
1565  case ISD::SETO: CondCode = ARMCC::VC; break;
1566  case ISD::SETUO: CondCode = ARMCC::VS; break;
1567  case ISD::SETUEQ:
1568  CondCode = ARMCC::EQ;
1569  CondCode2 = ARMCC::VS;
1570  InvalidOnQNaN = false;
1571  break;
1572  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1573  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1574  case ISD::SETLT:
1575  case ISD::SETULT: CondCode = ARMCC::LT; break;
1576  case ISD::SETLE:
1577  case ISD::SETULE: CondCode = ARMCC::LE; break;
1578  case ISD::SETNE:
1579  case ISD::SETUNE:
1580  CondCode = ARMCC::NE;
1581  InvalidOnQNaN = false;
1582  break;
1583  }
1584 }
1585 
1586 //===----------------------------------------------------------------------===//
1587 // Calling Convention Implementation
1588 //===----------------------------------------------------------------------===//
1589 
1590 #include "ARMGenCallingConv.inc"
1591 
1592 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1593 /// account presence of floating point hardware and calling convention
1594 /// limitations, such as support for variadic functions.
1596 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1597  bool isVarArg) const {
1598  switch (CC) {
1599  default:
1600  report_fatal_error("Unsupported calling convention");
1602  case CallingConv::ARM_APCS:
1603  case CallingConv::GHC:
1604  return CC;
1608  case CallingConv::Swift:
1610  case CallingConv::C:
1611  if (!Subtarget->isAAPCS_ABI())
1612  return CallingConv::ARM_APCS;
1613  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1615  !isVarArg)
1617  else
1618  return CallingConv::ARM_AAPCS;
1619  case CallingConv::Fast:
1621  if (!Subtarget->isAAPCS_ABI()) {
1622  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1623  return CallingConv::Fast;
1624  return CallingConv::ARM_APCS;
1625  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1627  else
1628  return CallingConv::ARM_AAPCS;
1629  }
1630 }
1631 
1633  bool isVarArg) const {
1634  return CCAssignFnForNode(CC, false, isVarArg);
1635 }
1636 
1638  bool isVarArg) const {
1639  return CCAssignFnForNode(CC, true, isVarArg);
1640 }
1641 
1642 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1643 /// CallingConvention.
1644 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1645  bool Return,
1646  bool isVarArg) const {
1647  switch (getEffectiveCallingConv(CC, isVarArg)) {
1648  default:
1649  report_fatal_error("Unsupported calling convention");
1650  case CallingConv::ARM_APCS:
1651  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1653  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1655  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1656  case CallingConv::Fast:
1657  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1658  case CallingConv::GHC:
1659  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1661  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1662  }
1663 }
1664 
1665 /// LowerCallResult - Lower the result values of a call into the
1666 /// appropriate copies out of appropriate physical registers.
1667 SDValue ARMTargetLowering::LowerCallResult(
1668  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1669  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1670  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1671  SDValue ThisVal) const {
1672  // Assign locations to each value returned by this call.
1674  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1675  *DAG.getContext());
1676  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1677 
1678  // Copy all of the result registers out of their specified physreg.
1679  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1680  CCValAssign VA = RVLocs[i];
1681 
1682  // Pass 'this' value directly from the argument to return value, to avoid
1683  // reg unit interference
1684  if (i == 0 && isThisReturn) {
1685  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1686  "unexpected return calling convention register assignment");
1687  InVals.push_back(ThisVal);
1688  continue;
1689  }
1690 
1691  SDValue Val;
1692  if (VA.needsCustom()) {
1693  // Handle f64 or half of a v2f64.
1694  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1695  InFlag);
1696  Chain = Lo.getValue(1);
1697  InFlag = Lo.getValue(2);
1698  VA = RVLocs[++i]; // skip ahead to next loc
1699  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1700  InFlag);
1701  Chain = Hi.getValue(1);
1702  InFlag = Hi.getValue(2);
1703  if (!Subtarget->isLittle())
1704  std::swap (Lo, Hi);
1705  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1706 
1707  if (VA.getLocVT() == MVT::v2f64) {
1708  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1709  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1710  DAG.getConstant(0, dl, MVT::i32));
1711 
1712  VA = RVLocs[++i]; // skip ahead to next loc
1713  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1714  Chain = Lo.getValue(1);
1715  InFlag = Lo.getValue(2);
1716  VA = RVLocs[++i]; // skip ahead to next loc
1717  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1718  Chain = Hi.getValue(1);
1719  InFlag = Hi.getValue(2);
1720  if (!Subtarget->isLittle())
1721  std::swap (Lo, Hi);
1722  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1723  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1724  DAG.getConstant(1, dl, MVT::i32));
1725  }
1726  } else {
1727  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1728  InFlag);
1729  Chain = Val.getValue(1);
1730  InFlag = Val.getValue(2);
1731  }
1732 
1733  switch (VA.getLocInfo()) {
1734  default: llvm_unreachable("Unknown loc info!");
1735  case CCValAssign::Full: break;
1736  case CCValAssign::BCvt:
1737  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1738  break;
1739  }
1740 
1741  InVals.push_back(Val);
1742  }
1743 
1744  return Chain;
1745 }
1746 
1747 /// LowerMemOpCallTo - Store the argument to the stack.
1748 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1749  SDValue Arg, const SDLoc &dl,
1750  SelectionDAG &DAG,
1751  const CCValAssign &VA,
1752  ISD::ArgFlagsTy Flags) const {
1753  unsigned LocMemOffset = VA.getLocMemOffset();
1754  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1755  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1756  StackPtr, PtrOff);
1757  return DAG.getStore(
1758  Chain, dl, Arg, PtrOff,
1759  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1760 }
1761 
1762 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1763  SDValue Chain, SDValue &Arg,
1764  RegsToPassVector &RegsToPass,
1765  CCValAssign &VA, CCValAssign &NextVA,
1766  SDValue &StackPtr,
1767  SmallVectorImpl<SDValue> &MemOpChains,
1768  ISD::ArgFlagsTy Flags) const {
1769  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1770  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1771  unsigned id = Subtarget->isLittle() ? 0 : 1;
1772  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1773 
1774  if (NextVA.isRegLoc())
1775  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1776  else {
1777  assert(NextVA.isMemLoc());
1778  if (!StackPtr.getNode())
1779  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1780  getPointerTy(DAG.getDataLayout()));
1781 
1782  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1783  dl, DAG, NextVA,
1784  Flags));
1785  }
1786 }
1787 
1788 /// LowerCall - Lowering a call into a callseq_start <-
1789 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1790 /// nodes.
1791 SDValue
1792 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1793  SmallVectorImpl<SDValue> &InVals) const {
1794  SelectionDAG &DAG = CLI.DAG;
1795  SDLoc &dl = CLI.DL;
1797  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1799  SDValue Chain = CLI.Chain;
1800  SDValue Callee = CLI.Callee;
1801  bool &isTailCall = CLI.IsTailCall;
1802  CallingConv::ID CallConv = CLI.CallConv;
1803  bool doesNotRet = CLI.DoesNotReturn;
1804  bool isVarArg = CLI.IsVarArg;
1805 
1806  MachineFunction &MF = DAG.getMachineFunction();
1807  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1808  bool isThisReturn = false;
1809  bool isSibCall = false;
1810  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1811 
1812  // Disable tail calls if they're not supported.
1813  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1814  isTailCall = false;
1815 
1816  if (isTailCall) {
1817  // Check if it's really possible to do a tail call.
1818  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1819  isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1820  Outs, OutVals, Ins, DAG);
1821  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1822  report_fatal_error("failed to perform tail call elimination on a call "
1823  "site marked musttail");
1824  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1825  // detected sibcalls.
1826  if (isTailCall) {
1827  ++NumTailCalls;
1828  isSibCall = true;
1829  }
1830  }
1831 
1832  // Analyze operands of the call, assigning locations to each operand.
1834  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1835  *DAG.getContext());
1836  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1837 
1838  // Get a count of how many bytes are to be pushed on the stack.
1839  unsigned NumBytes = CCInfo.getNextStackOffset();
1840 
1841  // For tail calls, memory operands are available in our caller's stack.
1842  if (isSibCall)
1843  NumBytes = 0;
1844 
1845  // Adjust the stack pointer for the new arguments...
1846  // These operations are automatically eliminated by the prolog/epilog pass
1847  if (!isSibCall)
1848  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1849 
1850  SDValue StackPtr =
1851  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1852 
1853  RegsToPassVector RegsToPass;
1854  SmallVector<SDValue, 8> MemOpChains;
1855 
1856  // Walk the register/memloc assignments, inserting copies/loads. In the case
1857  // of tail call optimization, arguments are handled later.
1858  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1859  i != e;
1860  ++i, ++realArgIdx) {
1861  CCValAssign &VA = ArgLocs[i];
1862  SDValue Arg = OutVals[realArgIdx];
1863  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1864  bool isByVal = Flags.isByVal();
1865 
1866  // Promote the value if needed.
1867  switch (VA.getLocInfo()) {
1868  default: llvm_unreachable("Unknown loc info!");
1869  case CCValAssign::Full: break;
1870  case CCValAssign::SExt:
1871  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1872  break;
1873  case CCValAssign::ZExt:
1874  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1875  break;
1876  case CCValAssign::AExt:
1877  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1878  break;
1879  case CCValAssign::BCvt:
1880  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1881  break;
1882  }
1883 
1884  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1885  if (VA.needsCustom()) {
1886  if (VA.getLocVT() == MVT::v2f64) {
1887  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1888  DAG.getConstant(0, dl, MVT::i32));
1889  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1890  DAG.getConstant(1, dl, MVT::i32));
1891 
1892  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1893  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1894 
1895  VA = ArgLocs[++i]; // skip ahead to next loc
1896  if (VA.isRegLoc()) {
1897  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1898  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1899  } else {
1900  assert(VA.isMemLoc());
1901 
1902  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1903  dl, DAG, VA, Flags));
1904  }
1905  } else {
1906  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1907  StackPtr, MemOpChains, Flags);
1908  }
1909  } else if (VA.isRegLoc()) {
1910  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1911  Outs[0].VT == MVT::i32) {
1912  assert(VA.getLocVT() == MVT::i32 &&
1913  "unexpected calling convention register assignment");
1914  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1915  "unexpected use of 'returned'");
1916  isThisReturn = true;
1917  }
1918  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1919  } else if (isByVal) {
1920  assert(VA.isMemLoc());
1921  unsigned offset = 0;
1922 
1923  // True if this byval aggregate will be split between registers
1924  // and memory.
1925  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1926  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1927 
1928  if (CurByValIdx < ByValArgsCount) {
1929 
1930  unsigned RegBegin, RegEnd;
1931  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1932 
1933  EVT PtrVT =
1935  unsigned int i, j;
1936  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1937  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1938  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1939  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1941  DAG.InferPtrAlignment(AddArg));
1942  MemOpChains.push_back(Load.getValue(1));
1943  RegsToPass.push_back(std::make_pair(j, Load));
1944  }
1945 
1946  // If parameter size outsides register area, "offset" value
1947  // helps us to calculate stack slot for remained part properly.
1948  offset = RegEnd - RegBegin;
1949 
1950  CCInfo.nextInRegsParam();
1951  }
1952 
1953  if (Flags.getByValSize() > 4*offset) {
1954  auto PtrVT = getPointerTy(DAG.getDataLayout());
1955  unsigned LocMemOffset = VA.getLocMemOffset();
1956  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1957  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1958  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1959  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1960  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1961  MVT::i32);
1962  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1963  MVT::i32);
1964 
1965  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1966  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1967  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1968  Ops));
1969  }
1970  } else if (!isSibCall) {
1971  assert(VA.isMemLoc());
1972 
1973  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1974  dl, DAG, VA, Flags));
1975  }
1976  }
1977 
1978  if (!MemOpChains.empty())
1979  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1980 
1981  // Build a sequence of copy-to-reg nodes chained together with token chain
1982  // and flag operands which copy the outgoing args into the appropriate regs.
1983  SDValue InFlag;
1984  // Tail call byval lowering might overwrite argument registers so in case of
1985  // tail call optimization the copies to registers are lowered later.
1986  if (!isTailCall)
1987  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1988  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1989  RegsToPass[i].second, InFlag);
1990  InFlag = Chain.getValue(1);
1991  }
1992 
1993  // For tail calls lower the arguments to the 'real' stack slot.
1994  if (isTailCall) {
1995  // Force all the incoming stack arguments to be loaded from the stack
1996  // before any new outgoing arguments are stored to the stack, because the
1997  // outgoing stack slots may alias the incoming argument stack slots, and
1998  // the alias isn't otherwise explicit. This is slightly more conservative
1999  // than necessary, because it means that each store effectively depends
2000  // on every argument instead of just those arguments it would clobber.
2001 
2002  // Do not flag preceding copytoreg stuff together with the following stuff.
2003  InFlag = SDValue();
2004  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2005  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2006  RegsToPass[i].second, InFlag);
2007  InFlag = Chain.getValue(1);
2008  }
2009  InFlag = SDValue();
2010  }
2011 
2012  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2013  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2014  // node so that legalize doesn't hack it.
2015  bool isDirect = false;
2016 
2017  const TargetMachine &TM = getTargetMachine();
2018  const Module *Mod = MF.getFunction().getParent();
2019  const GlobalValue *GV = nullptr;
2020  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2021  GV = G->getGlobal();
2022  bool isStub =
2023  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2024 
2025  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2026  bool isLocalARMFunc = false;
2028  auto PtrVt = getPointerTy(DAG.getDataLayout());
2029 
2030  if (Subtarget->genLongCalls()) {
2031  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2032  "long-calls codegen is not position independent!");
2033  // Handle a global address or an external symbol. If it's not one of
2034  // those, the target's already in a register, so we don't need to do
2035  // anything extra.
2036  if (isa<GlobalAddressSDNode>(Callee)) {
2037  // Create a constant pool entry for the callee address
2038  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2039  ARMConstantPoolValue *CPV =
2040  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2041 
2042  // Get the address of the callee into a register
2043  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2044  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2045  Callee = DAG.getLoad(
2046  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2048  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2049  const char *Sym = S->getSymbol();
2050 
2051  // Create a constant pool entry for the callee address
2052  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2053  ARMConstantPoolValue *CPV =
2055  ARMPCLabelIndex, 0);
2056  // Get the address of the callee into a register
2057  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2058  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2059  Callee = DAG.getLoad(
2060  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2062  }
2063  } else if (isa<GlobalAddressSDNode>(Callee)) {
2064  // If we're optimizing for minimum size and the function is called three or
2065  // more times in this block, we can improve codesize by calling indirectly
2066  // as BLXr has a 16-bit encoding.
2067  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2068  auto *BB = CLI.CS.getParent();
2069  bool PreferIndirect =
2070  Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2071  count_if(GV->users(), [&BB](const User *U) {
2072  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2073  }) > 2;
2074 
2075  if (!PreferIndirect) {
2076  isDirect = true;
2077  bool isDef = GV->isStrongDefinitionForLinker();
2078 
2079  // ARM call to a local ARM function is predicable.
2080  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2081  // tBX takes a register source operand.
2082  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2083  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2084  Callee = DAG.getNode(
2085  ARMISD::WrapperPIC, dl, PtrVt,
2086  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2087  Callee = DAG.getLoad(
2088  PtrVt, dl, DAG.getEntryNode(), Callee,
2090  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2092  } else if (Subtarget->isTargetCOFF()) {
2093  assert(Subtarget->isTargetWindows() &&
2094  "Windows is the only supported COFF target");
2095  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2098  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2099  TargetFlags);
2100  if (GV->hasDLLImportStorageClass())
2101  Callee =
2102  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2103  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2105  } else {
2106  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2107  }
2108  }
2109  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2110  isDirect = true;
2111  // tBX takes a register source operand.
2112  const char *Sym = S->getSymbol();
2113  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2114  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2115  ARMConstantPoolValue *CPV =
2117  ARMPCLabelIndex, 4);
2118  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2119  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2120  Callee = DAG.getLoad(
2121  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2123  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2124  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2125  } else {
2126  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2127  }
2128  }
2129 
2130  // FIXME: handle tail calls differently.
2131  unsigned CallOpc;
2132  if (Subtarget->isThumb()) {
2133  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2134  CallOpc = ARMISD::CALL_NOLINK;
2135  else
2136  CallOpc = ARMISD::CALL;
2137  } else {
2138  if (!isDirect && !Subtarget->hasV5TOps())
2139  CallOpc = ARMISD::CALL_NOLINK;
2140  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2141  // Emit regular call when code size is the priority
2142  !MF.getFunction().optForMinSize())
2143  // "mov lr, pc; b _foo" to avoid confusing the RSP
2144  CallOpc = ARMISD::CALL_NOLINK;
2145  else
2146  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2147  }
2148 
2149  std::vector<SDValue> Ops;
2150  Ops.push_back(Chain);
2151  Ops.push_back(Callee);
2152 
2153  // Add argument registers to the end of the list so that they are known live
2154  // into the call.
2155  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2156  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2157  RegsToPass[i].second.getValueType()));
2158 
2159  // Add a register mask operand representing the call-preserved registers.
2160  if (!isTailCall) {
2161  const uint32_t *Mask;
2162  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2163  if (isThisReturn) {
2164  // For 'this' returns, use the R0-preserving mask if applicable
2165  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2166  if (!Mask) {
2167  // Set isThisReturn to false if the calling convention is not one that
2168  // allows 'returned' to be modeled in this way, so LowerCallResult does
2169  // not try to pass 'this' straight through
2170  isThisReturn = false;
2171  Mask = ARI->getCallPreservedMask(MF, CallConv);
2172  }
2173  } else
2174  Mask = ARI->getCallPreservedMask(MF, CallConv);
2175 
2176  assert(Mask && "Missing call preserved mask for calling convention");
2177  Ops.push_back(DAG.getRegisterMask(Mask));
2178  }
2179 
2180  if (InFlag.getNode())
2181  Ops.push_back(InFlag);
2182 
2183  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2184  if (isTailCall) {
2186  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2187  }
2188 
2189  // Returns a chain and a flag for retval copy to use.
2190  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2191  InFlag = Chain.getValue(1);
2192 
2193  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2194  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2195  if (!Ins.empty())
2196  InFlag = Chain.getValue(1);
2197 
2198  // Handle result values, copying them out of physregs into vregs that we
2199  // return.
2200  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2201  InVals, isThisReturn,
2202  isThisReturn ? OutVals[0] : SDValue());
2203 }
2204 
2205 /// HandleByVal - Every parameter *after* a byval parameter is passed
2206 /// on the stack. Remember the next parameter register to allocate,
2207 /// and then confiscate the rest of the parameter registers to insure
2208 /// this.
2209 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2210  unsigned Align) const {
2211  // Byval (as with any stack) slots are always at least 4 byte aligned.
2212  Align = std::max(Align, 4U);
2213 
2214  unsigned Reg = State->AllocateReg(GPRArgRegs);
2215  if (!Reg)
2216  return;
2217 
2218  unsigned AlignInRegs = Align / 4;
2219  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2220  for (unsigned i = 0; i < Waste; ++i)
2221  Reg = State->AllocateReg(GPRArgRegs);
2222 
2223  if (!Reg)
2224  return;
2225 
2226  unsigned Excess = 4 * (ARM::R4 - Reg);
2227 
2228  // Special case when NSAA != SP and parameter size greater than size of
2229  // all remained GPR regs. In that case we can't split parameter, we must
2230  // send it to stack. We also must set NCRN to R4, so waste all
2231  // remained registers.
2232  const unsigned NSAAOffset = State->getNextStackOffset();
2233  if (NSAAOffset != 0 && Size > Excess) {
2234  while (State->AllocateReg(GPRArgRegs))
2235  ;
2236  return;
2237  }
2238 
2239  // First register for byval parameter is the first register that wasn't
2240  // allocated before this method call, so it would be "reg".
2241  // If parameter is small enough to be saved in range [reg, r4), then
2242  // the end (first after last) register would be reg + param-size-in-regs,
2243  // else parameter would be splitted between registers and stack,
2244  // end register would be r4 in this case.
2245  unsigned ByValRegBegin = Reg;
2246  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2247  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2248  // Note, first register is allocated in the beginning of function already,
2249  // allocate remained amount of registers we need.
2250  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2251  State->AllocateReg(GPRArgRegs);
2252  // A byval parameter that is split between registers and memory needs its
2253  // size truncated here.
2254  // In the case where the entire structure fits in registers, we set the
2255  // size in memory to zero.
2256  Size = std::max<int>(Size - Excess, 0);
2257 }
2258 
2259 /// MatchingStackOffset - Return true if the given stack call argument is
2260 /// already available in the same position (relatively) of the caller's
2261 /// incoming argument stack.
2262 static
2265  const TargetInstrInfo *TII) {
2266  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2267  int FI = std::numeric_limits<int>::max();
2268  if (Arg.getOpcode() == ISD::CopyFromReg) {
2269  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2271  return false;
2272  MachineInstr *Def = MRI->getVRegDef(VR);
2273  if (!Def)
2274  return false;
2275  if (!Flags.isByVal()) {
2276  if (!TII->isLoadFromStackSlot(*Def, FI))
2277  return false;
2278  } else {
2279  return false;
2280  }
2281  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2282  if (Flags.isByVal())
2283  // ByVal argument is passed in as a pointer but it's now being
2284  // dereferenced. e.g.
2285  // define @foo(%struct.X* %A) {
2286  // tail call @bar(%struct.X* byval %A)
2287  // }
2288  return false;
2289  SDValue Ptr = Ld->getBasePtr();
2290  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2291  if (!FINode)
2292  return false;
2293  FI = FINode->getIndex();
2294  } else
2295  return false;
2296 
2298  if (!MFI.isFixedObjectIndex(FI))
2299  return false;
2300  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2301 }
2302 
2303 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2304 /// for tail call optimization. Targets which want to do tail call
2305 /// optimization should implement this function.
2306 bool
2307 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2308  CallingConv::ID CalleeCC,
2309  bool isVarArg,
2310  bool isCalleeStructRet,
2311  bool isCallerStructRet,
2312  const SmallVectorImpl<ISD::OutputArg> &Outs,
2313  const SmallVectorImpl<SDValue> &OutVals,
2314  const SmallVectorImpl<ISD::InputArg> &Ins,
2315  SelectionDAG& DAG) const {
2316  MachineFunction &MF = DAG.getMachineFunction();
2317  const Function &CallerF = MF.getFunction();
2318  CallingConv::ID CallerCC = CallerF.getCallingConv();
2319 
2320  assert(Subtarget->supportsTailCall());
2321 
2322  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2323  // to the call take up r0-r3. The reason is that there are no legal registers
2324  // left to hold the pointer to the function to be called.
2325  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2326  !isa<GlobalAddressSDNode>(Callee.getNode()))
2327  return false;
2328 
2329  // Look for obvious safe cases to perform tail call optimization that do not
2330  // require ABI changes. This is what gcc calls sibcall.
2331 
2332  // Exception-handling functions need a special set of instructions to indicate
2333  // a return to the hardware. Tail-calling another function would probably
2334  // break this.
2335  if (CallerF.hasFnAttribute("interrupt"))
2336  return false;
2337 
2338  // Also avoid sibcall optimization if either caller or callee uses struct
2339  // return semantics.
2340  if (isCalleeStructRet || isCallerStructRet)
2341  return false;
2342 
2343  // Externally-defined functions with weak linkage should not be
2344  // tail-called on ARM when the OS does not support dynamic
2345  // pre-emption of symbols, as the AAELF spec requires normal calls
2346  // to undefined weak functions to be replaced with a NOP or jump to the
2347  // next instruction. The behaviour of branch instructions in this
2348  // situation (as used for tail calls) is implementation-defined, so we
2349  // cannot rely on the linker replacing the tail call with a return.
2350  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2351  const GlobalValue *GV = G->getGlobal();
2352  const Triple &TT = getTargetMachine().getTargetTriple();
2353  if (GV->hasExternalWeakLinkage() &&
2354  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2355  return false;
2356  }
2357 
2358  // Check that the call results are passed in the same way.
2359  LLVMContext &C = *DAG.getContext();
2360  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2361  CCAssignFnForReturn(CalleeCC, isVarArg),
2362  CCAssignFnForReturn(CallerCC, isVarArg)))
2363  return false;
2364  // The callee has to preserve all registers the caller needs to preserve.
2365  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2366  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2367  if (CalleeCC != CallerCC) {
2368  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2369  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2370  return false;
2371  }
2372 
2373  // If Caller's vararg or byval argument has been split between registers and
2374  // stack, do not perform tail call, since part of the argument is in caller's
2375  // local frame.
2376  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2377  if (AFI_Caller->getArgRegsSaveSize())
2378  return false;
2379 
2380  // If the callee takes no arguments then go on to check the results of the
2381  // call.
2382  if (!Outs.empty()) {
2383  // Check if stack adjustment is needed. For now, do not do this if any
2384  // argument is passed on the stack.
2386  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2387  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2388  if (CCInfo.getNextStackOffset()) {
2389  // Check if the arguments are already laid out in the right way as
2390  // the caller's fixed stack objects.
2391  MachineFrameInfo &MFI = MF.getFrameInfo();
2392  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2393  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2394  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2395  i != e;
2396  ++i, ++realArgIdx) {
2397  CCValAssign &VA = ArgLocs[i];
2398  EVT RegVT = VA.getLocVT();
2399  SDValue Arg = OutVals[realArgIdx];
2400  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2401  if (VA.getLocInfo() == CCValAssign::Indirect)
2402  return false;
2403  if (VA.needsCustom()) {
2404  // f64 and vector types are split into multiple registers or
2405  // register/stack-slot combinations. The types will not match
2406  // the registers; give up on memory f64 refs until we figure
2407  // out what to do about this.
2408  if (!VA.isRegLoc())
2409  return false;
2410  if (!ArgLocs[++i].isRegLoc())
2411  return false;
2412  if (RegVT == MVT::v2f64) {
2413  if (!ArgLocs[++i].isRegLoc())
2414  return false;
2415  if (!ArgLocs[++i].isRegLoc())
2416  return false;
2417  }
2418  } else if (!VA.isRegLoc()) {
2419  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2420  MFI, MRI, TII))
2421  return false;
2422  }
2423  }
2424  }
2425 
2426  const MachineRegisterInfo &MRI = MF.getRegInfo();
2427  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2428  return false;
2429  }
2430 
2431  return true;
2432 }
2433 
2434 bool
2435 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2436  MachineFunction &MF, bool isVarArg,
2437  const SmallVectorImpl<ISD::OutputArg> &Outs,
2438  LLVMContext &Context) const {
2440  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2441  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2442 }
2443 
2445  const SDLoc &DL, SelectionDAG &DAG) {
2446  const MachineFunction &MF = DAG.getMachineFunction();
2447  const Function &F = MF.getFunction();
2448 
2449  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2450 
2451  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2452  // version of the "preferred return address". These offsets affect the return
2453  // instruction if this is a return from PL1 without hypervisor extensions.
2454  // IRQ/FIQ: +4 "subs pc, lr, #4"
2455  // SWI: 0 "subs pc, lr, #0"
2456  // ABORT: +4 "subs pc, lr, #4"
2457  // UNDEF: +4/+2 "subs pc, lr, #0"
2458  // UNDEF varies depending on where the exception came from ARM or Thumb
2459  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2460 
2461  int64_t LROffset;
2462  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2463  IntKind == "ABORT")
2464  LROffset = 4;
2465  else if (IntKind == "SWI" || IntKind == "UNDEF")
2466  LROffset = 0;
2467  else
2468  report_fatal_error("Unsupported interrupt attribute. If present, value "
2469  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2470 
2471  RetOps.insert(RetOps.begin() + 1,
2472  DAG.getConstant(LROffset, DL, MVT::i32, false));
2473 
2474  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2475 }
2476 
2477 SDValue
2478 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2479  bool isVarArg,
2480  const SmallVectorImpl<ISD::OutputArg> &Outs,
2481  const SmallVectorImpl<SDValue> &OutVals,
2482  const SDLoc &dl, SelectionDAG &DAG) const {
2483  // CCValAssign - represent the assignment of the return value to a location.
2485 
2486  // CCState - Info about the registers and stack slots.
2487  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2488  *DAG.getContext());
2489 
2490  // Analyze outgoing return values.
2491  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2492 
2493  SDValue Flag;
2494  SmallVector<SDValue, 4> RetOps;
2495  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2496  bool isLittleEndian = Subtarget->isLittle();
2497 
2498  MachineFunction &MF = DAG.getMachineFunction();
2500  AFI->setReturnRegsCount(RVLocs.size());
2501 
2502  // Copy the result values into the output registers.
2503  for (unsigned i = 0, realRVLocIdx = 0;
2504  i != RVLocs.size();
2505  ++i, ++realRVLocIdx) {
2506  CCValAssign &VA = RVLocs[i];
2507  assert(VA.isRegLoc() && "Can only return in registers!");
2508 
2509  SDValue Arg = OutVals[realRVLocIdx];
2510  bool ReturnF16 = false;
2511 
2512  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2513  // Half-precision return values can be returned like this:
2514  //
2515  // t11 f16 = fadd ...
2516  // t12: i16 = bitcast t11
2517  // t13: i32 = zero_extend t12
2518  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2519  //
2520  // to avoid code generation for bitcasts, we simply set Arg to the node
2521  // that produces the f16 value, t11 in this case.
2522  //
2523  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2524  SDValue ZE = Arg.getOperand(0);
2525  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2526  SDValue BC = ZE.getOperand(0);
2527  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2528  Arg = BC.getOperand(0);
2529  ReturnF16 = true;
2530  }
2531  }
2532  }
2533  }
2534 
2535  switch (VA.getLocInfo()) {
2536  default: llvm_unreachable("Unknown loc info!");
2537  case CCValAssign::Full: break;
2538  case CCValAssign::BCvt:
2539  if (!ReturnF16)
2540  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2541  break;
2542  }
2543 
2544  if (VA.needsCustom()) {
2545  if (VA.getLocVT() == MVT::v2f64) {
2546  // Extract the first half and return it in two registers.
2547  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2548  DAG.getConstant(0, dl, MVT::i32));
2549  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2550  DAG.getVTList(MVT::i32, MVT::i32), Half);
2551 
2552  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2553  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2554  Flag);
2555  Flag = Chain.getValue(1);
2556  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2557  VA = RVLocs[++i]; // skip ahead to next loc
2558  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2559  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2560  Flag);
2561  Flag = Chain.getValue(1);
2562  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2563  VA = RVLocs[++i]; // skip ahead to next loc
2564 
2565  // Extract the 2nd half and fall through to handle it as an f64 value.
2566  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2567  DAG.getConstant(1, dl, MVT::i32));
2568  }
2569  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2570  // available.
2571  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2572  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2573  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2574  fmrrd.getValue(isLittleEndian ? 0 : 1),
2575  Flag);
2576  Flag = Chain.getValue(1);
2577  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2578  VA = RVLocs[++i]; // skip ahead to next loc
2579  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2580  fmrrd.getValue(isLittleEndian ? 1 : 0),
2581  Flag);
2582  } else
2583  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2584 
2585  // Guarantee that all emitted copies are
2586  // stuck together, avoiding something bad.
2587  Flag = Chain.getValue(1);
2588  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2589  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2590  }
2591  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2592  const MCPhysReg *I =
2594  if (I) {
2595  for (; *I; ++I) {
2596  if (ARM::GPRRegClass.contains(*I))
2597  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2598  else if (ARM::DPRRegClass.contains(*I))
2599  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2600  else
2601  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2602  }
2603  }
2604 
2605  // Update chain and glue.
2606  RetOps[0] = Chain;
2607  if (Flag.getNode())
2608  RetOps.push_back(Flag);
2609 
2610  // CPUs which aren't M-class use a special sequence to return from
2611  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2612  // though we use "subs pc, lr, #N").
2613  //
2614  // M-class CPUs actually use a normal return sequence with a special
2615  // (hardware-provided) value in LR, so the normal code path works.
2616  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2617  !Subtarget->isMClass()) {
2618  if (Subtarget->isThumb1Only())
2619  report_fatal_error("interrupt attribute is not supported in Thumb1");
2620  return LowerInterruptReturn(RetOps, dl, DAG);
2621  }
2622 
2623  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2624 }
2625 
2626 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2627  if (N->getNumValues() != 1)
2628  return false;
2629  if (!N->hasNUsesOfValue(1, 0))
2630  return false;
2631 
2632  SDValue TCChain = Chain;
2633  SDNode *Copy = *N->use_begin();
2634  if (Copy->getOpcode() == ISD::CopyToReg) {
2635  // If the copy has a glue operand, we conservatively assume it isn't safe to
2636  // perform a tail call.
2637  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2638  return false;
2639  TCChain = Copy->getOperand(0);
2640  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2641  SDNode *VMov = Copy;
2642  // f64 returned in a pair of GPRs.
2643  SmallPtrSet<SDNode*, 2> Copies;
2644  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2645  UI != UE; ++UI) {
2646  if (UI->getOpcode() != ISD::CopyToReg)
2647  return false;
2648  Copies.insert(*UI);
2649  }
2650  if (Copies.size() > 2)
2651  return false;
2652 
2653  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2654  UI != UE; ++UI) {
2655  SDValue UseChain = UI->getOperand(0);
2656  if (Copies.count(UseChain.getNode()))
2657  // Second CopyToReg
2658  Copy = *UI;
2659  else {
2660  // We are at the top of this chain.
2661  // If the copy has a glue operand, we conservatively assume it
2662  // isn't safe to perform a tail call.
2663  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2664  return false;
2665  // First CopyToReg
2666  TCChain = UseChain;
2667  }
2668  }
2669  } else if (Copy->getOpcode() == ISD::BITCAST) {
2670  // f32 returned in a single GPR.
2671  if (!Copy->hasOneUse())
2672  return false;
2673  Copy = *Copy->use_begin();
2674  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2675  return false;
2676  // If the copy has a glue operand, we conservatively assume it isn't safe to
2677  // perform a tail call.
2678  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2679  return false;
2680  TCChain = Copy->getOperand(0);
2681  } else {
2682  return false;
2683  }
2684 
2685  bool HasRet = false;
2686  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2687  UI != UE; ++UI) {
2688  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2689  UI->getOpcode() != ARMISD::INTRET_FLAG)
2690  return false;
2691  HasRet = true;
2692  }
2693 
2694  if (!HasRet)
2695  return false;
2696 
2697  Chain = TCChain;
2698  return true;
2699 }
2700 
2701 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2702  if (!Subtarget->supportsTailCall())
2703  return false;
2704 
2705  auto Attr =
2706  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2707  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2708  return false;
2709 
2710  return true;
2711 }
2712 
2713 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2714 // and pass the lower and high parts through.
2716  SDLoc DL(Op);
2717  SDValue WriteValue = Op->getOperand(2);
2718 
2719  // This function is only supposed to be called for i64 type argument.
2720  assert(WriteValue.getValueType() == MVT::i64
2721  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2722 
2723  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2724  DAG.getConstant(0, DL, MVT::i32));
2725  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2726  DAG.getConstant(1, DL, MVT::i32));
2727  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2728  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2729 }
2730 
2731 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2732 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2733 // one of the above mentioned nodes. It has to be wrapped because otherwise
2734 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2735 // be used to form addressing mode. These wrapped nodes will be selected
2736 // into MOVi.
2737 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2738  SelectionDAG &DAG) const {
2739  EVT PtrVT = Op.getValueType();
2740  // FIXME there is no actual debug info here
2741  SDLoc dl(Op);
2742  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2743  SDValue Res;
2744 
2745  // When generating execute-only code Constant Pools must be promoted to the
2746  // global data section. It's a bit ugly that we can't share them across basic
2747  // blocks, but this way we guarantee that execute-only behaves correct with
2748  // position-independent addressing modes.
2749  if (Subtarget->genExecuteOnly()) {
2750  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2751  auto T = const_cast<Type*>(CP->getType());
2752  auto C = const_cast<Constant*>(CP->getConstVal());
2753  auto M = const_cast<Module*>(DAG.getMachineFunction().
2754  getFunction().getParent());
2755  auto GV = new GlobalVariable(
2756  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2757  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2758  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2759  Twine(AFI->createPICLabelUId())
2760  );
2761  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2762  dl, PtrVT);
2763  return LowerGlobalAddress(GA, DAG);
2764  }
2765 
2766  if (CP->isMachineConstantPoolEntry())
2767  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2768  CP->getAlignment());
2769  else
2770  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2771  CP->getAlignment());
2772  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2773 }
2774 
2777 }
2778 
2779 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2780  SelectionDAG &DAG) const {
2781  MachineFunction &MF = DAG.getMachineFunction();
2783  unsigned ARMPCLabelIndex = 0;
2784  SDLoc DL(Op);
2785  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2786  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2787  SDValue CPAddr;
2788  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2789  if (!IsPositionIndependent) {
2790  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2791  } else {
2792  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2793  ARMPCLabelIndex = AFI->createPICLabelUId();
2794  ARMConstantPoolValue *CPV =
2795  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2796  ARMCP::CPBlockAddress, PCAdj);
2797  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2798  }
2799  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2800  SDValue Result = DAG.getLoad(
2801  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2803  if (!IsPositionIndependent)
2804  return Result;
2805  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2806  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2807 }
2808 
2809 /// Convert a TLS address reference into the correct sequence of loads
2810 /// and calls to compute the variable's address for Darwin, and return an
2811 /// SDValue containing the final node.
2812 
2813 /// Darwin only has one TLS scheme which must be capable of dealing with the
2814 /// fully general situation, in the worst case. This means:
2815 /// + "extern __thread" declaration.
2816 /// + Defined in a possibly unknown dynamic library.
2817 ///
2818 /// The general system is that each __thread variable has a [3 x i32] descriptor
2819 /// which contains information used by the runtime to calculate the address. The
2820 /// only part of this the compiler needs to know about is the first word, which
2821 /// contains a function pointer that must be called with the address of the
2822 /// entire descriptor in "r0".
2823 ///
2824 /// Since this descriptor may be in a different unit, in general access must
2825 /// proceed along the usual ARM rules. A common sequence to produce is:
2826 ///
2827 /// movw rT1, :lower16:_var$non_lazy_ptr
2828 /// movt rT1, :upper16:_var$non_lazy_ptr
2829 /// ldr r0, [rT1]
2830 /// ldr rT2, [r0]
2831 /// blx rT2
2832 /// [...address now in r0...]
2833 SDValue
2834 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2835  SelectionDAG &DAG) const {
2836  assert(Subtarget->isTargetDarwin() &&
2837  "This function expects a Darwin target");
2838  SDLoc DL(Op);
2839 
2840  // First step is to get the address of the actua global symbol. This is where
2841  // the TLS descriptor lives.
2842  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2843 
2844  // The first entry in the descriptor is a function pointer that we must call
2845  // to obtain the address of the variable.
2846  SDValue Chain = DAG.getEntryNode();
2847  SDValue FuncTLVGet = DAG.getLoad(
2848  MVT::i32, DL, Chain, DescAddr,
2850  /* Alignment = */ 4,
2853  Chain = FuncTLVGet.getValue(1);
2854 
2856  MachineFrameInfo &MFI = F.getFrameInfo();
2857  MFI.setAdjustsStack(true);
2858 
2859  // TLS calls preserve all registers except those that absolutely must be
2860  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2861  // silly).
2862  auto TRI =
2863  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2864  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2865  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2866 
2867  // Finally, we can make the call. This is just a degenerate version of a
2868  // normal AArch64 call node: r0 takes the address of the descriptor, and
2869  // returns the address of the variable in this thread.
2870  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2871  Chain =
2873  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2874  DAG.getRegisterMask(Mask), Chain.getValue(1));
2875  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2876 }
2877 
2878 SDValue
2879 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2880  SelectionDAG &DAG) const {
2881  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2882 
2883  SDValue Chain = DAG.getEntryNode();
2884  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2885  SDLoc DL(Op);
2886 
2887  // Load the current TEB (thread environment block)
2888  SDValue Ops[] = {Chain,
2889  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2890  DAG.getConstant(15, DL, MVT::i32),
2891  DAG.getConstant(0, DL, MVT::i32),
2892  DAG.getConstant(13, DL, MVT::i32),
2893  DAG.getConstant(0, DL, MVT::i32),
2894  DAG.getConstant(2, DL, MVT::i32)};
2895  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2896  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2897 
2898  SDValue TEB = CurrentTEB.getValue(0);
2899  Chain = CurrentTEB.getValue(1);
2900 
2901  // Load the ThreadLocalStoragePointer from the TEB
2902  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2903  SDValue TLSArray =
2904  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2905  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2906 
2907  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2908  // offset into the TLSArray.
2909 
2910  // Load the TLS index from the C runtime
2911  SDValue TLSIndex =
2912  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2913  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2914  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2915 
2916  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2917  DAG.getConstant(2, DL, MVT::i32));
2918  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2919  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2920  MachinePointerInfo());
2921 
2922  // Get the offset of the start of the .tls section (section base)
2923  const auto *GA = cast<GlobalAddressSDNode>(Op);
2924  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2925  SDValue Offset = DAG.getLoad(
2926  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2927  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2929 
2930  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2931 }
2932 
2933 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2934 SDValue
2935 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2936  SelectionDAG &DAG) const {
2937  SDLoc dl(GA);
2938  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2939  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2940  MachineFunction &MF = DAG.getMachineFunction();
2942  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2943  ARMConstantPoolValue *CPV =
2944  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2945  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2946  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2947  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2948  Argument = DAG.getLoad(
2949  PtrVT, dl, DAG.getEntryNode(), Argument,
2951  SDValue Chain = Argument.getValue(1);
2952 
2953  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2954  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2955 
2956  // call __tls_get_addr.
2957  ArgListTy Args;
2958  ArgListEntry Entry;
2959  Entry.Node = Argument;
2960  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2961  Args.push_back(Entry);
2962 
2963  // FIXME: is there useful debug info available here?
2965  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2967  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2968 
2969  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2970  return CallResult.first;
2971 }
2972 
2973 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2974 // "local exec" model.
2975 SDValue
2976 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2977  SelectionDAG &DAG,
2978  TLSModel::Model model) const {
2979  const GlobalValue *GV = GA->getGlobal();
2980  SDLoc dl(GA);
2981  SDValue Offset;
2982  SDValue Chain = DAG.getEntryNode();
2983  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2984  // Get the Thread Pointer
2986 
2987  if (model == TLSModel::InitialExec) {
2988  MachineFunction &MF = DAG.getMachineFunction();
2990  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2991  // Initial exec model.
2992  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2993  ARMConstantPoolValue *CPV =
2994  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2996  true);
2997  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2998  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2999  Offset = DAG.getLoad(
3000  PtrVT, dl, Chain, Offset,
3002  Chain = Offset.getValue(1);
3003 
3004  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3005  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3006 
3007  Offset = DAG.getLoad(
3008  PtrVT, dl, Chain, Offset,
3010  } else {
3011  // local exec model
3012  assert(model == TLSModel::LocalExec);
3013  ARMConstantPoolValue *CPV =
3015  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3016  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3017  Offset = DAG.getLoad(
3018  PtrVT, dl, Chain, Offset,
3020  }
3021 
3022  // The address of the thread local variable is the add of the thread
3023  // pointer with the offset of the variable.
3024  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3025 }
3026 
3027 SDValue
3028 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3029  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3030  if (DAG.getTarget().useEmulatedTLS())
3031  return LowerToTLSEmulatedModel(GA, DAG);
3032 
3033  if (Subtarget->isTargetDarwin())
3034  return LowerGlobalTLSAddressDarwin(Op, DAG);
3035 
3036  if (Subtarget->isTargetWindows())
3037  return LowerGlobalTLSAddressWindows(Op, DAG);
3038 
3039  // TODO: implement the "local dynamic" model
3040  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3042 
3043  switch (model) {
3046  return LowerToTLSGeneralDynamicModel(GA, DAG);
3047  case TLSModel::InitialExec:
3048  case TLSModel::LocalExec:
3049  return LowerToTLSExecModels(GA, DAG, model);
3050  }
3051  llvm_unreachable("bogus TLS model");
3052 }
3053 
3054 /// Return true if all users of V are within function F, looking through
3055 /// ConstantExprs.
3056 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3057  SmallVector<const User*,4> Worklist;
3058  for (auto *U : V->users())
3059  Worklist.push_back(U);
3060  while (!Worklist.empty()) {
3061  auto *U = Worklist.pop_back_val();
3062  if (isa<ConstantExpr>(U)) {
3063  for (auto *UU : U->users())
3064  Worklist.push_back(UU);
3065  continue;
3066  }
3067 
3068  auto *I = dyn_cast<Instruction>(U);
3069  if (!I || I->getParent()->getParent() != F)
3070  return false;
3071  }
3072  return true;
3073 }
3074 
3076  const GlobalValue *GV, SelectionDAG &DAG,
3077  EVT PtrVT, const SDLoc &dl) {
3078  // If we're creating a pool entry for a constant global with unnamed address,
3079  // and the global is small enough, we can emit it inline into the constant pool
3080  // to save ourselves an indirection.
3081  //
3082  // This is a win if the constant is only used in one function (so it doesn't
3083  // need to be duplicated) or duplicating the constant wouldn't increase code
3084  // size (implying the constant is no larger than 4 bytes).
3085  const Function &F = DAG.getMachineFunction().getFunction();
3086 
3087  // We rely on this decision to inline being idemopotent and unrelated to the
3088  // use-site. We know that if we inline a variable at one use site, we'll
3089  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3090  // doesn't know about this optimization, so bail out if it's enabled else
3091  // we could decide to inline here (and thus never emit the GV) but require
3092  // the GV from fast-isel generated code.
3093  if (!EnableConstpoolPromotion ||
3095  return SDValue();
3096 
3097  auto *GVar = dyn_cast<GlobalVariable>(GV);
3098  if (!GVar || !GVar->hasInitializer() ||
3099  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3100  !GVar->hasLocalLinkage())
3101  return SDValue();
3102 
3103  // If we inline a value that contains relocations, we move the relocations
3104  // from .data to .text. This is not allowed in position-independent code.
3105  auto *Init = GVar->getInitializer();
3106  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3107  Init->needsRelocation())
3108  return SDValue();
3109 
3110  // The constant islands pass can only really deal with alignment requests
3111  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3112  // any type wanting greater alignment requirements than 4 bytes. We also
3113  // can only promote constants that are multiples of 4 bytes in size or
3114  // are paddable to a multiple of 4. Currently we only try and pad constants
3115  // that are strings for simplicity.
3116  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3117  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3118  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3119  unsigned RequiredPadding = 4 - (Size % 4);
3120  bool PaddingPossible =
3121  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3122  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3123  Size == 0)
3124  return SDValue();
3125 
3126  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3127  MachineFunction &MF = DAG.getMachineFunction();
3129 
3130  // We can't bloat the constant pool too much, else the ConstantIslands pass
3131  // may fail to converge. If we haven't promoted this global yet (it may have
3132  // multiple uses), and promoting it would increase the constant pool size (Sz
3133  // > 4), ensure we have space to do so up to MaxTotal.
3134  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3135  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3137  return SDValue();
3138 
3139  // This is only valid if all users are in a single function; we can't clone
3140  // the constant in general. The LLVM IR unnamed_addr allows merging
3141  // constants, but not cloning them.
3142  //
3143  // We could potentially allow cloning if we could prove all uses of the
3144  // constant in the current function don't care about the address, like
3145  // printf format strings. But that isn't implemented for now.
3146  if (!allUsersAreInFunction(GVar, &F))
3147  return SDValue();
3148 
3149  // We're going to inline this global. Pad it out if needed.
3150  if (RequiredPadding != 4) {
3151  StringRef S = CDAInit->getAsString();
3152 
3154  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3155  while (RequiredPadding--)
3156  V.push_back(0);
3157  Init = ConstantDataArray::get(*DAG.getContext(), V);
3158  }
3159 
3160  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3161  SDValue CPAddr =
3162  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3163  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3166  PaddedSize - 4);
3167  }
3168  ++NumConstpoolPromoted;
3169  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3170 }
3171 
3173  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3174  GV = GA->getBaseObject();
3175  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3176  isa<Function>(GV);
3177 }
3178 
3179 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3180  SelectionDAG &DAG) const {
3181  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3182  default: llvm_unreachable("unknown object format");
3183  case Triple::COFF:
3184  return LowerGlobalAddressWindows(Op, DAG);
3185  case Triple::ELF:
3186  return LowerGlobalAddressELF(Op, DAG);
3187  case Triple::MachO:
3188  return LowerGlobalAddressDarwin(Op, DAG);
3189  }
3190 }
3191 
3192 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3193  SelectionDAG &DAG) const {
3194  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3195  SDLoc dl(Op);
3196  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3197  const TargetMachine &TM = getTargetMachine();
3198  bool IsRO = isReadOnly(GV);
3199 
3200  // promoteToConstantPool only if not generating XO text section
3201  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3202  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3203  return V;
3204 
3205  if (isPositionIndependent()) {
3206  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3207  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3208  UseGOT_PREL ? ARMII::MO_GOT : 0);
3209  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3210  if (UseGOT_PREL)
3211  Result =
3212  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3214  return Result;
3215  } else if (Subtarget->isROPI() && IsRO) {
3216  // PC-relative.
3217  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3218  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3219  return Result;
3220  } else if (Subtarget->isRWPI() && !IsRO) {
3221  // SB-relative.
3222  SDValue RelAddr;
3223  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3224  ++NumMovwMovt;
3225  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3226  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3227  } else { // use literal pool for address constant
3228  ARMConstantPoolValue *CPV =
3230  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3231  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3232  RelAddr = DAG.getLoad(
3233  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3235  }
3236  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3237  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3238  return Result;
3239  }
3240 
3241  // If we have T2 ops, we can materialize the address directly via movt/movw
3242  // pair. This is always cheaper.
3243  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3244  ++NumMovwMovt;
3245  // FIXME: Once remat is capable of dealing with instructions with register
3246  // operands, expand this into two nodes.
3247  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3248  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3249  } else {
3250  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3251  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3252  return DAG.getLoad(
3253  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3255  }
3256 }
3257 
3258 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3259  SelectionDAG &DAG) const {
3260  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3261  "ROPI/RWPI not currently supported for Darwin");
3262  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3263  SDLoc dl(Op);
3264  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3265 
3266  if (Subtarget->useMovt(DAG.getMachineFunction()))
3267  ++NumMovwMovt;
3268 
3269  // FIXME: Once remat is capable of dealing with instructions with register
3270  // operands, expand this into multiple nodes
3271  unsigned Wrapper =
3273 
3274  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3275  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3276 
3277  if (Subtarget->isGVIndirectSymbol(GV))
3278  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3280  return Result;
3281 }
3282 
3283 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3284  SelectionDAG &DAG) const {
3285  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3286  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3287  "Windows on ARM expects to use movw/movt");
3288  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3289  "ROPI/RWPI not currently supported for Windows");
3290 
3291  const TargetMachine &TM = getTargetMachine();
3292  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3294  if (GV->hasDLLImportStorageClass())
3295  TargetFlags = ARMII::MO_DLLIMPORT;
3296  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3297  TargetFlags = ARMII::MO_COFFSTUB;
3298  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3299  SDValue Result;
3300  SDLoc DL(Op);
3301 
3302  ++NumMovwMovt;
3303 
3304  // FIXME: Once remat is capable of dealing with instructions with register
3305  // operands, expand this into two nodes.
3306  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3307  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3308  TargetFlags));
3309  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3310  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3312  return Result;
3313 }
3314 
3315 SDValue
3316 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3317  SDLoc dl(Op);
3318  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3319  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3320  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3321  Op.getOperand(1), Val);
3322 }
3323 
3324 SDValue
3325 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3326  SDLoc dl(Op);
3327  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3328  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3329 }
3330 
3331 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3332  SelectionDAG &DAG) const {
3333  SDLoc dl(Op);
3335  Op.getOperand(0));
3336 }
3337 
3338 SDValue
3339 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3340  const ARMSubtarget *Subtarget) const {
3341  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3342  SDLoc dl(Op);
3343  switch (IntNo) {
3344  default: return SDValue(); // Don't custom lower most intrinsics.
3345  case Intrinsic::thread_pointer: {
3346  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3347  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3348  }
3349  case Intrinsic::eh_sjlj_lsda: {
3350  MachineFunction &MF = DAG.getMachineFunction();
3352  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3353  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3354  SDValue CPAddr;
3355  bool IsPositionIndependent = isPositionIndependent();
3356  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3357  ARMConstantPoolValue *CPV =
3358  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3359  ARMCP::CPLSDA, PCAdj);
3360  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3361  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3362  SDValue Result = DAG.getLoad(
3363  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3365 
3366  if (IsPositionIndependent) {
3367  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3368  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3369  }
3370  return Result;
3371  }
3372  case Intrinsic::arm_neon_vabs:
3373  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3374  Op.getOperand(1));
3375  case Intrinsic::arm_neon_vmulls:
3376  case Intrinsic::arm_neon_vmullu: {
3377  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3379  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3380  Op.getOperand(1), Op.getOperand(2));
3381  }
3382  case Intrinsic::arm_neon_vminnm:
3383  case Intrinsic::arm_neon_vmaxnm: {
3384  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3386  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3387  Op.getOperand(1), Op.getOperand(2));
3388  }
3389  case Intrinsic::arm_neon_vminu:
3390  case Intrinsic::arm_neon_vmaxu: {
3391  if (Op.getValueType().isFloatingPoint())
3392  return SDValue();
3393  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3394  ? ISD::UMIN : ISD::UMAX;
3395  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3396  Op.getOperand(1), Op.getOperand(2));
3397  }
3398  case Intrinsic::arm_neon_vmins:
3399  case Intrinsic::arm_neon_vmaxs: {
3400  // v{min,max}s is overloaded between signed integers and floats.
3401  if (!Op.getValueType().isFloatingPoint()) {
3402  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3403  ? ISD::SMIN : ISD::SMAX;
3404  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3405  Op.getOperand(1), Op.getOperand(2));
3406  }
3407  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3409  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3410  Op.getOperand(1), Op.getOperand(2));
3411  }
3412  case Intrinsic::arm_neon_vtbl1:
3413  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3414  Op.getOperand(1), Op.getOperand(2));
3415  case Intrinsic::arm_neon_vtbl2:
3416  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3417  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3418  }
3419 }
3420 
3422  const ARMSubtarget *Subtarget) {
3423  SDLoc dl(Op);
3424  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3425  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3426  if (SSID == SyncScope::SingleThread)
3427  return Op;
3428 
3429  if (!Subtarget->hasDataBarrier()) {
3430  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3431  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3432  // here.
3433  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3434  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3435  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3436  DAG.getConstant(0, dl, MVT::i32));
3437  }
3438 
3439  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3440  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3441  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3442  if (Subtarget->isMClass()) {
3443  // Only a full system barrier exists in the M-class architectures.
3444  Domain = ARM_MB::SY;
3445  } else if (Subtarget->preferISHSTBarriers() &&
3446  Ord == AtomicOrdering::Release) {
3447  // Swift happens to implement ISHST barriers in a way that's compatible with
3448  // Release semantics but weaker than ISH so we'd be fools not to use
3449  // it. Beware: other processors probably don't!
3450  Domain = ARM_MB::ISHST;
3451  }
3452 
3453  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3454  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3455  DAG.getConstant(Domain, dl, MVT::i32));
3456 }
3457 
3459  const ARMSubtarget *Subtarget) {
3460  // ARM pre v5TE and Thumb1 does not have preload instructions.
3461  if (!(Subtarget->isThumb2() ||
3462  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3463  // Just preserve the chain.
3464  return Op.getOperand(0);
3465 
3466  SDLoc dl(Op);
3467  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3468  if (!isRead &&
3469  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3470  // ARMv7 with MP extension has PLDW.
3471  return Op.getOperand(0);
3472 
3473  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3474  if (Subtarget->isThumb()) {
3475  // Invert the bits.
3476  isRead = ~isRead & 1;
3477  isData = ~isData & 1;
3478  }
3479 
3480  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3481  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3482  DAG.getConstant(isData, dl, MVT::i32));
3483 }
3484 
3486  MachineFunction &MF = DAG.getMachineFunction();
3487  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3488 
3489  // vastart just stores the address of the VarArgsFrameIndex slot into the
3490  // memory location argument.
3491  SDLoc dl(Op);
3492  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3493  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3494  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3495  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3496  MachinePointerInfo(SV));
3497 }
3498 
3499 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3500  CCValAssign &NextVA,
3501  SDValue &Root,
3502  SelectionDAG &DAG,
3503  const SDLoc &dl) const {
3504  MachineFunction &MF = DAG.getMachineFunction();
3506 
3507  const TargetRegisterClass *RC;
3508  if (AFI->isThumb1OnlyFunction())
3509  RC = &ARM::tGPRRegClass;
3510  else
3511  RC = &ARM::GPRRegClass;
3512 
3513  // Transform the arguments stored in physical registers into virtual ones.
3514  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3515  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3516 
3517  SDValue ArgValue2;
3518  if (NextVA.isMemLoc()) {
3519  MachineFrameInfo &MFI = MF.getFrameInfo();
3520  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3521 
3522  // Create load node to retrieve arguments from the stack.
3523  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3524  ArgValue2 = DAG.getLoad(
3525  MVT::i32, dl, Root, FIN,
3527  } else {
3528  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3529  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3530  }
3531  if (!Subtarget->isLittle())
3532  std::swap (ArgValue, ArgValue2);
3533  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3534 }
3535 
3536 // The remaining GPRs hold either the beginning of variable-argument
3537 // data, or the beginning of an aggregate passed by value (usually
3538 // byval). Either way, we allocate stack slots adjacent to the data
3539 // provided by our caller, and store the unallocated registers there.
3540 // If this is a variadic function, the va_list pointer will begin with
3541 // these values; otherwise, this reassembles a (byval) structure that
3542 // was split between registers and memory.
3543 // Return: The frame index registers were stored into.
3544 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3545  const SDLoc &dl, SDValue &Chain,
3546  const Value *OrigArg,
3547  unsigned InRegsParamRecordIdx,
3548  int ArgOffset, unsigned ArgSize) const {
3549  // Currently, two use-cases possible:
3550  // Case #1. Non-var-args function, and we meet first byval parameter.
3551  // Setup first unallocated register as first byval register;
3552  // eat all remained registers
3553  // (these two actions are performed by HandleByVal method).
3554  // Then, here, we initialize stack frame with
3555  // "store-reg" instructions.
3556  // Case #2. Var-args function, that doesn't contain byval parameters.
3557  // The same: eat all remained unallocated registers,
3558  // initialize stack frame.
3559 
3560  MachineFunction &MF = DAG.getMachineFunction();
3561  MachineFrameInfo &MFI = MF.getFrameInfo();
3563  unsigned RBegin, REnd;
3564  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3565  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3566  } else {
3567  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3568  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3569  REnd = ARM::R4;
3570  }
3571 
3572  if (REnd != RBegin)
3573  ArgOffset = -4 * (ARM::R4 - RBegin);
3574 
3575  auto PtrVT = getPointerTy(DAG.getDataLayout());
3576  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3577  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3578 
3579  SmallVector<SDValue, 4> MemOps;
3580  const TargetRegisterClass *RC =
3581  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3582 
3583  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3584  unsigned VReg = MF.addLiveIn(Reg, RC);
3585  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3586  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3587  MachinePointerInfo(OrigArg, 4 * i));
3588  MemOps.push_back(Store);
3589  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3590  }
3591 
3592  if (!MemOps.empty())
3593  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3594  return FrameIndex;
3595 }
3596 
3597 // Setup stack frame, the va_list pointer will start from.
3598 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3599  const SDLoc &dl, SDValue &Chain,
3600  unsigned ArgOffset,
3601  unsigned TotalArgRegsSaveSize,
3602  bool ForceMutable) const {
3603  MachineFunction &MF = DAG.getMachineFunction();
3605 
3606  // Try to store any remaining integer argument regs
3607  // to their spots on the stack so that they may be loaded by dereferencing
3608  // the result of va_next.
3609  // If there is no regs to be stored, just point address after last
3610  // argument passed via stack.
3611  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3612  CCInfo.getInRegsParamsCount(),
3613  CCInfo.getNextStackOffset(), 4);
3614  AFI->setVarArgsFrameIndex(FrameIndex);
3615 }
3616 
3617 SDValue ARMTargetLowering::LowerFormalArguments(
3618  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3619  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3620  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3621  MachineFunction &MF = DAG.getMachineFunction();
3622  MachineFrameInfo &MFI = MF.getFrameInfo();
3623 
3625 
3626  // Assign locations to all of the incoming arguments.
3628  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3629  *DAG.getContext());
3630  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3631 
3632  SmallVector<SDValue, 16> ArgValues;
3633  SDValue ArgValue;
3635  unsigned CurArgIdx = 0;
3636 
3637  // Initially ArgRegsSaveSize is zero.
3638  // Then we increase this value each time we meet byval parameter.
3639  // We also increase this value in case of varargs function.
3640  AFI->setArgRegsSaveSize(0);
3641 
3642  // Calculate the amount of stack space that we need to allocate to store
3643  // byval and variadic arguments that are passed in registers.
3644  // We need to know this before we allocate the first byval or variadic
3645  // argument, as they will be allocated a stack slot below the CFA (Canonical
3646  // Frame Address, the stack pointer at entry to the function).
3647  unsigned ArgRegBegin = ARM::R4;
3648  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3649  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3650  break;
3651 
3652  CCValAssign &VA = ArgLocs[i];
3653  unsigned Index = VA.getValNo();
3654  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3655  if (!Flags.isByVal())
3656  continue;
3657 
3658  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3659  unsigned RBegin, REnd;
3660  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3661  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3662 
3663  CCInfo.nextInRegsParam();
3664  }
3665  CCInfo.rewindByValRegsInfo();
3666 
3667  int lastInsIndex = -1;
3668  if (isVarArg && MFI.hasVAStart()) {
3669  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3670  if (RegIdx != array_lengthof(GPRArgRegs))
3671  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3672  }
3673 
3674  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3675  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3676  auto PtrVT = getPointerTy(DAG.getDataLayout());
3677 
3678  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3679  CCValAssign &VA = ArgLocs[i];
3680  if (Ins[VA.getValNo()].isOrigArg()) {
3681  std::advance(CurOrigArg,
3682  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3683  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3684  }
3685  // Arguments stored in registers.
3686  if (VA.isRegLoc()) {
3687  EVT RegVT = VA.getLocVT();
3688 
3689  if (VA.needsCustom()) {
3690  // f64 and vector types are split up into multiple registers or
3691  // combinations of registers and stack slots.
3692  if (VA.getLocVT() == MVT::v2f64) {
3693  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3694  Chain, DAG, dl);
3695  VA = ArgLocs[++i]; // skip ahead to next loc
3696  SDValue ArgValue2;
3697  if (VA.isMemLoc()) {
3698  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3699  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3700  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3702  DAG.getMachineFunction(), FI));
3703  } else {
3704  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3705  Chain, DAG, dl);
3706  }
3707  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3708  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3709  ArgValue, ArgValue1,
3710  DAG.getIntPtrConstant(0, dl));
3711  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3712  ArgValue, ArgValue2,
3713  DAG.getIntPtrConstant(1, dl));
3714  } else
3715  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3716  } else {
3717  const TargetRegisterClass *RC;
3718 
3719 
3720  if (RegVT == MVT::f16)
3721  RC = &ARM::HPRRegClass;
3722  else if (RegVT == MVT::f32)
3723  RC = &ARM::SPRRegClass;
3724  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3725  RC = &ARM::DPRRegClass;
3726  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3727  RC = &ARM::QPRRegClass;
3728  else if (RegVT == MVT::i32)
3729  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3730  : &ARM::GPRRegClass;
3731  else
3732  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3733 
3734  // Transform the arguments in physical registers into virtual ones.
3735  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3736  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3737  }
3738 
3739  // If this is an 8 or 16-bit value, it is really passed promoted
3740  // to 32 bits. Insert an assert[sz]ext to capture this, then
3741  // truncate to the right size.
3742  switch (VA.getLocInfo()) {
3743  default: llvm_unreachable("Unknown loc info!");
3744  case CCValAssign::Full: break;
3745  case CCValAssign::BCvt:
3746  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3747  break;
3748  case CCValAssign::SExt:
3749  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3750  DAG.getValueType(VA.getValVT()));
3751  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3752  break;
3753  case CCValAssign::ZExt:
3754  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3755  DAG.getValueType(VA.getValVT()));
3756  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3757  break;
3758  }
3759 
3760  InVals.push_back(ArgValue);
3761  } else { // VA.isRegLoc()
3762  // sanity check
3763  assert(VA.isMemLoc());
3764  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3765 
3766  int index = VA.getValNo();
3767 
3768  // Some Ins[] entries become multiple ArgLoc[] entries.
3769  // Process them only once.
3770  if (index != lastInsIndex)
3771  {
3772  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3773  // FIXME: For now, all byval parameter objects are marked mutable.
3774  // This can be changed with more analysis.
3775  // In case of tail call optimization mark all arguments mutable.
3776  // Since they could be overwritten by lowering of arguments in case of
3777  // a tail call.
3778  if (Flags.isByVal()) {
3779  assert(Ins[index].isOrigArg() &&
3780  "Byval arguments cannot be implicit");
3781  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3782 
3783  int FrameIndex = StoreByValRegs(
3784  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3785  VA.getLocMemOffset(), Flags.getByValSize());
3786  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3787  CCInfo.nextInRegsParam();
3788  } else {
3789  unsigned FIOffset = VA.getLocMemOffset();
3790  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3791  FIOffset, true);
3792 
3793  // Create load nodes to retrieve arguments from the stack.
3794  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3795  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3797  DAG.getMachineFunction(), FI)));
3798  }
3799  lastInsIndex = index;
3800  }
3801  }
3802  }
3803 
3804  // varargs
3805  if (isVarArg && MFI.hasVAStart())
3806  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3807  CCInfo.getNextStackOffset(),
3808  TotalArgRegsSaveSize);
3809 
3811 
3812  return Chain;
3813 }
3814 
3815 /// isFloatingPointZero - Return true if this is +0.0.
3816 static bool isFloatingPointZero(SDValue Op) {
3817  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3818  return CFP->getValueAPF().isPosZero();
3819  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3820  // Maybe this has already been legalized into the constant pool?
3821  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3822  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3823  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3824  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3825  return CFP->getValueAPF().isPosZero();
3826  }
3827  } else if (Op->getOpcode() == ISD::BITCAST &&
3828  Op->getValueType(0) == MVT::f64) {
3829  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3830  // created by LowerConstantFP().
3831  SDValue BitcastOp = Op->getOperand(0);
3832  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3833  isNullConstant(BitcastOp->getOperand(0)))
3834  return true;
3835  }
3836  return false;
3837 }
3838 
3839 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3840 /// the given operands.
3841 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3842  SDValue &ARMcc, SelectionDAG &DAG,
3843  const SDLoc &dl) const {
3844  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3845  unsigned C = RHSC->getZExtValue();
3846  if (!isLegalICmpImmediate((int32_t)C)) {
3847  // Constant does not fit, try adjusting it by one.
3848  switch (CC) {
3849  default: break;
3850  case ISD::SETLT:
3851  case ISD::SETGE:
3852  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3853  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3854  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3855  }
3856  break;
3857  case ISD::SETULT:
3858  case ISD::SETUGE:
3859  if (C != 0 && isLegalICmpImmediate(C-1)) {
3860  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3861  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3862  }
3863  break;
3864  case ISD::SETLE:
3865  case ISD::SETGT:
3866  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3867  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3868  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3869  }
3870  break;
3871  case ISD::SETULE:
3872  case ISD::SETUGT:
3873  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3874  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3875  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3876  }
3877  break;
3878  }
3879  }
3880  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3882  // In ARM and Thumb-2, the compare instructions can shift their second
3883  // operand.
3885  std::swap(LHS, RHS);
3886  }
3887 
3889  ARMISD::NodeType CompareType;
3890  switch (CondCode) {
3891  default:
3892  CompareType = ARMISD::CMP;
3893  break;
3894  case ARMCC::EQ:
3895  case ARMCC::NE:
3896  // Uses only Z Flag
3897  CompareType = ARMISD::CMPZ;
3898  break;
3899  }
3900  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3901  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3902 }
3903 
3904 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3905 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3906  SelectionDAG &DAG, const SDLoc &dl,
3907  bool InvalidOnQNaN) const {
3908  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3909  SDValue Cmp;
3910  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3911  if (!isFloatingPointZero(RHS))
3912  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3913  else
3914  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3915  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3916 }
3917 
3918 /// duplicateCmp - Glue values can have only one use, so this function
3919 /// duplicates a comparison node.
3920 SDValue
3921 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3922  unsigned Opc = Cmp.getOpcode();
3923  SDLoc DL(Cmp);
3924  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3925  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3926 
3927  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3928  Cmp = Cmp.getOperand(0);
3929  Opc = Cmp.getOpcode();
3930  if (Opc == ARMISD::CMPFP)
3931  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3932  Cmp.getOperand(1), Cmp.getOperand(2));
3933  else {
3934  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3935  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3936  Cmp.getOperand(1));
3937  }
3938  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3939 }
3940 
3941 // This function returns three things: the arithmetic computation itself
3942 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3943 // comparison and the condition code define the case in which the arithmetic
3944 // computation *does not* overflow.
3945 std::pair<SDValue, SDValue>
3946 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3947  SDValue &ARMcc) const {
3948  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3949 
3950  SDValue Value, OverflowCmp;
3951  SDValue LHS = Op.getOperand(0);
3952  SDValue RHS = Op.getOperand(1);
3953  SDLoc dl(Op);
3954 
3955  // FIXME: We are currently always generating CMPs because we don't support
3956  // generating CMN through the backend. This is not as good as the natural
3957  // CMP case because it causes a register dependency and cannot be folded
3958  // later.
3959 
3960  switch (Op.getOpcode()) {
3961  default:
3962  llvm_unreachable("Unknown overflow instruction!");
3963  case ISD::SADDO:
3964  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3965  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3966  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3967  break;
3968  case ISD::UADDO:
3969  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3970  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3971  // We do not use it in the USUBO case as Value may not be used.
3972  Value = DAG.getNode(ARMISD::ADDC, dl,
3973  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3974  .getValue(0);
3975  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3976  break;
3977  case ISD::SSUBO:
3978  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3979  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3980  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3981  break;
3982  case ISD::USUBO:
3983  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3984  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3985  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3986  break;
3987  case ISD::UMULO:
3988  // We generate a UMUL_LOHI and then check if the high word is 0.
3989  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
3990  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
3991  DAG.getVTList(Op.getValueType(), Op.getValueType()),
3992  LHS, RHS);
3993  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
3994  DAG.getConstant(0, dl, MVT::i32));
3995  Value = Value.getValue(0); // We only want the low 32 bits for the result.
3996  break;
3997  case ISD::SMULO:
3998  // We generate a SMUL_LOHI and then check if all the bits of the high word
3999  // are the same as the sign bit of the low word.
4000  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4001  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4002  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4003  LHS, RHS);
4004  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4005  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4006  Value.getValue(0),
4007  DAG.getConstant(31, dl, MVT::i32)));
4008  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4009  break;
4010  } // switch (...)
4011 
4012  return std::make_pair(Value, OverflowCmp);
4013 }
4014 
4015 SDValue
4016 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4017  // Let legalize expand this if it isn't a legal type yet.
4019  return SDValue();
4020 
4021  SDValue Value, OverflowCmp;
4022  SDValue ARMcc;
4023  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4024  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4025  SDLoc dl(Op);
4026  // We use 0 and 1 as false and true values.
4027  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4028  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4029  EVT VT = Op.getValueType();
4030 
4031  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4032  ARMcc, CCR, OverflowCmp);
4033 
4034  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4035  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4036 }
4037