LLVM  10.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 #define DEBUG_TYPE "arm-isel"
119 
120 STATISTIC(NumTailCalls, "Number of tail calls");
121 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123 STATISTIC(NumConstpoolPromoted,
124  "Number of constants with their storage promoted into constant pools");
125 
126 static cl::opt<bool>
127 ARMInterworking("arm-interworking", cl::Hidden,
128  cl::desc("Enable / disable ARM interworking (for debugging only)"),
129  cl::init(true));
130 
132  "arm-promote-constant", cl::Hidden,
133  cl::desc("Enable / disable promotion of unnamed_addr constants into "
134  "constant pools"),
135  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137  "arm-promote-constant-max-size", cl::Hidden,
138  cl::desc("Maximum size of constant to promote into a constant pool"),
139  cl::init(64));
141  "arm-promote-constant-max-total", cl::Hidden,
142  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143  cl::init(128));
144 
145 // The APCS parameter registers.
146 static const MCPhysReg GPRArgRegs[] = {
147  ARM::R0, ARM::R1, ARM::R2, ARM::R3
148 };
149 
150 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151  MVT PromotedBitwiseVT) {
152  if (VT != PromotedLdStVT) {
153  setOperationAction(ISD::LOAD, VT, Promote);
154  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155 
156  setOperationAction(ISD::STORE, VT, Promote);
157  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158  }
159 
160  MVT ElemTy = VT.getVectorElementType();
161  if (ElemTy != MVT::f64)
162  setOperationAction(ISD::SETCC, VT, Custom);
163  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
164  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
165  if (ElemTy == MVT::i32) {
166  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
167  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
168  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
169  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
170  } else {
171  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
172  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
173  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
174  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
175  }
176  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
177  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
178  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
179  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
180  setOperationAction(ISD::SELECT, VT, Expand);
181  setOperationAction(ISD::SELECT_CC, VT, Expand);
182  setOperationAction(ISD::VSELECT, VT, Expand);
183  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
184  if (VT.isInteger()) {
185  setOperationAction(ISD::SHL, VT, Custom);
186  setOperationAction(ISD::SRA, VT, Custom);
187  setOperationAction(ISD::SRL, VT, Custom);
188  }
189 
190  // Promote all bit-wise operations.
191  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  setOperationAction(ISD::AND, VT, Promote);
193  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  setOperationAction(ISD::OR, VT, Promote);
195  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  setOperationAction(ISD::XOR, VT, Promote);
197  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198  }
199 
200  // Neon does not support vector divide/remainder operations.
201  setOperationAction(ISD::SDIV, VT, Expand);
202  setOperationAction(ISD::UDIV, VT, Expand);
203  setOperationAction(ISD::FDIV, VT, Expand);
204  setOperationAction(ISD::SREM, VT, Expand);
205  setOperationAction(ISD::UREM, VT, Expand);
206  setOperationAction(ISD::FREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212 }
213 
214 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215  addRegisterClass(VT, &ARM::DPRRegClass);
216  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217 }
218 
219 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220  addRegisterClass(VT, &ARM::DPairRegClass);
221  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222 }
223 
224 void ARMTargetLowering::setAllExpand(MVT VT) {
225  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
226  setOperationAction(Opc, VT, Expand);
227 
228  // We support these really simple operations even on types where all
229  // the actual arithmetic has to be broken down into simpler
230  // operations or turned into library calls.
231  setOperationAction(ISD::BITCAST, VT, Legal);
232  setOperationAction(ISD::LOAD, VT, Legal);
233  setOperationAction(ISD::STORE, VT, Legal);
234  setOperationAction(ISD::UNDEF, VT, Legal);
235 }
236 
237 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
238  LegalizeAction Action) {
239  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
240  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
241  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
242 }
243 
244 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
245  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
246 
247  for (auto VT : IntTypes) {
248  addRegisterClass(VT, &ARM::QPRRegClass);
249  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
250  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
251  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
252  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
253  setOperationAction(ISD::SHL, VT, Custom);
254  setOperationAction(ISD::SRA, VT, Custom);
255  setOperationAction(ISD::SRL, VT, Custom);
256  setOperationAction(ISD::SMIN, VT, Legal);
257  setOperationAction(ISD::SMAX, VT, Legal);
258  setOperationAction(ISD::UMIN, VT, Legal);
259  setOperationAction(ISD::UMAX, VT, Legal);
260  setOperationAction(ISD::ABS, VT, Legal);
261 
262  // No native support for these.
263  setOperationAction(ISD::UDIV, VT, Expand);
264  setOperationAction(ISD::SDIV, VT, Expand);
265  setOperationAction(ISD::UREM, VT, Expand);
266  setOperationAction(ISD::SREM, VT, Expand);
267 
268  if (!HasMVEFP) {
269  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
270  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
271  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
272  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
273  }
274  }
275 
276  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
277  for (auto VT : FloatTypes) {
278  addRegisterClass(VT, &ARM::QPRRegClass);
279  if (!HasMVEFP)
280  setAllExpand(VT);
281 
282  // These are legal or custom whether we have MVE.fp or not
283  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
284  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
285  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
286  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
287  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
288  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
289  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
290 
291  if (HasMVEFP) {
292  setOperationAction(ISD::FMINNUM, VT, Legal);
293  setOperationAction(ISD::FMAXNUM, VT, Legal);
294  setOperationAction(ISD::FROUND, VT, Legal);
295 
296  // No native support for these.
297  setOperationAction(ISD::FDIV, VT, Expand);
298  setOperationAction(ISD::FREM, VT, Expand);
299  setOperationAction(ISD::FSQRT, VT, Expand);
300  setOperationAction(ISD::FSIN, VT, Expand);
301  setOperationAction(ISD::FCOS, VT, Expand);
302  setOperationAction(ISD::FPOW, VT, Expand);
303  setOperationAction(ISD::FLOG, VT, Expand);
304  setOperationAction(ISD::FLOG2, VT, Expand);
305  setOperationAction(ISD::FLOG10, VT, Expand);
306  setOperationAction(ISD::FEXP, VT, Expand);
307  setOperationAction(ISD::FEXP2, VT, Expand);
308  setOperationAction(ISD::FNEARBYINT, VT, Expand);
309  }
310  }
311 
312  // We 'support' these types up to bitcast/load/store level, regardless of
313  // MVE integer-only / float support. Only doing FP data processing on the FP
314  // vector types is inhibited at integer-only level.
315  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
316  for (auto VT : LongTypes) {
317  addRegisterClass(VT, &ARM::QPRRegClass);
318  setAllExpand(VT);
319  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
320  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
321  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
322  }
323  // We can do bitwise operations on v2i64 vectors
324  setOperationAction(ISD::AND, MVT::v2i64, Legal);
325  setOperationAction(ISD::OR, MVT::v2i64, Legal);
326  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
327 
328  // It is legal to extload from v4i8 to v4i16 or v4i32.
329  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
330  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
331  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
332 
333  // Some truncating stores are legal too.
334  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
335  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
336  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
337 }
338 
340  const ARMSubtarget &STI)
341  : TargetLowering(TM), Subtarget(&STI) {
342  RegInfo = Subtarget->getRegisterInfo();
343  Itins = Subtarget->getInstrItineraryData();
344 
347 
348  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
349  !Subtarget->isTargetWatchOS()) {
350  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
351  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
352  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
353  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
355  }
356 
357  if (Subtarget->isTargetMachO()) {
358  // Uses VFP for Thumb libfuncs if available.
359  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
360  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
361  static const struct {
362  const RTLIB::Libcall Op;
363  const char * const Name;
364  const ISD::CondCode Cond;
365  } LibraryCalls[] = {
366  // Single-precision floating-point arithmetic.
367  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
368  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
369  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
370  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
371 
372  // Double-precision floating-point arithmetic.
373  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
374  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
375  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
376  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
377 
378  // Single-precision comparisons.
379  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
380  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
381  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
382  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
383  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
384  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
385  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
386  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
387 
388  // Double-precision comparisons.
389  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
390  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
391  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
392  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
393  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
394  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
395  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
396  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
397 
398  // Floating-point to integer conversions.
399  // i64 conversions are done via library routines even when generating VFP
400  // instructions, so use the same ones.
401  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
402  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
403  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
404  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
405 
406  // Conversions between floating types.
407  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
408  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
409 
410  // Integer to floating-point conversions.
411  // i64 conversions are done via library routines even when generating VFP
412  // instructions, so use the same ones.
413  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
414  // e.g., __floatunsidf vs. __floatunssidfvfp.
415  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
416  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
417  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
418  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
419  };
420 
421  for (const auto &LC : LibraryCalls) {
422  setLibcallName(LC.Op, LC.Name);
423  if (LC.Cond != ISD::SETCC_INVALID)
424  setCmpLibcallCC(LC.Op, LC.Cond);
425  }
426  }
427  }
428 
429  // These libcalls are not available in 32-bit.
430  setLibcallName(RTLIB::SHL_I128, nullptr);
431  setLibcallName(RTLIB::SRL_I128, nullptr);
432  setLibcallName(RTLIB::SRA_I128, nullptr);
433 
434  // RTLIB
435  if (Subtarget->isAAPCS_ABI() &&
436  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
437  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
438  static const struct {
439  const RTLIB::Libcall Op;
440  const char * const Name;
441  const CallingConv::ID CC;
442  const ISD::CondCode Cond;
443  } LibraryCalls[] = {
444  // Double-precision floating-point arithmetic helper functions
445  // RTABI chapter 4.1.2, Table 2
446  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
447  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
448  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
449  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
450 
451  // Double-precision floating-point comparison helper functions
452  // RTABI chapter 4.1.2, Table 3
453  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
454  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
455  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
456  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
457  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
458  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
459  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
460  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
461 
462  // Single-precision floating-point arithmetic helper functions
463  // RTABI chapter 4.1.2, Table 4
464  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
465  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
466  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
467  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
468 
469  // Single-precision floating-point comparison helper functions
470  // RTABI chapter 4.1.2, Table 5
471  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
472  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
473  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
474  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
475  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
476  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
477  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
478  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
479 
480  // Floating-point to integer conversions.
481  // RTABI chapter 4.1.2, Table 6
482  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
483  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
484  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
485  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
486  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
487  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
488  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
489  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
490 
491  // Conversions between floating types.
492  // RTABI chapter 4.1.2, Table 7
493  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
494  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
495  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
496 
497  // Integer to floating-point conversions.
498  // RTABI chapter 4.1.2, Table 8
499  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
500  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
501  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
502  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
503  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
504  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
505  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
506  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
507 
508  // Long long helper functions
509  // RTABI chapter 4.2, Table 9
510  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
511  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
512  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
513  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
514 
515  // Integer division functions
516  // RTABI chapter 4.3.1
517  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
518  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
519  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
520  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
521  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
522  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
523  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
524  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
525  };
526 
527  for (const auto &LC : LibraryCalls) {
528  setLibcallName(LC.Op, LC.Name);
529  setLibcallCallingConv(LC.Op, LC.CC);
530  if (LC.Cond != ISD::SETCC_INVALID)
531  setCmpLibcallCC(LC.Op, LC.Cond);
532  }
533 
534  // EABI dependent RTLIB
535  if (TM.Options.EABIVersion == EABI::EABI4 ||
537  static const struct {
538  const RTLIB::Libcall Op;
539  const char *const Name;
540  const CallingConv::ID CC;
541  const ISD::CondCode Cond;
542  } MemOpsLibraryCalls[] = {
543  // Memory operations
544  // RTABI chapter 4.3.4
546  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548  };
549 
550  for (const auto &LC : MemOpsLibraryCalls) {
551  setLibcallName(LC.Op, LC.Name);
552  setLibcallCallingConv(LC.Op, LC.CC);
553  if (LC.Cond != ISD::SETCC_INVALID)
554  setCmpLibcallCC(LC.Op, LC.Cond);
555  }
556  }
557  }
558 
559  if (Subtarget->isTargetWindows()) {
560  static const struct {
561  const RTLIB::Libcall Op;
562  const char * const Name;
563  const CallingConv::ID CC;
564  } LibraryCalls[] = {
565  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
566  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
567  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
568  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
569  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
570  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
571  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
572  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
573  };
574 
575  for (const auto &LC : LibraryCalls) {
576  setLibcallName(LC.Op, LC.Name);
577  setLibcallCallingConv(LC.Op, LC.CC);
578  }
579  }
580 
581  // Use divmod compiler-rt calls for iOS 5.0 and later.
582  if (Subtarget->isTargetMachO() &&
583  !(Subtarget->isTargetIOS() &&
584  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
585  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
586  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
587  }
588 
589  // The half <-> float conversion functions are always soft-float on
590  // non-watchos platforms, but are needed for some targets which use a
591  // hard-float calling convention by default.
592  if (!Subtarget->isTargetWatchABI()) {
593  if (Subtarget->isAAPCS_ABI()) {
594  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
595  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
596  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
597  } else {
598  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
599  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
600  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
601  }
602  }
603 
604  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
605  // a __gnu_ prefix (which is the default).
606  if (Subtarget->isTargetAEABI()) {
607  static const struct {
608  const RTLIB::Libcall Op;
609  const char * const Name;
610  const CallingConv::ID CC;
611  } LibraryCalls[] = {
612  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
613  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
614  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
615  };
616 
617  for (const auto &LC : LibraryCalls) {
618  setLibcallName(LC.Op, LC.Name);
619  setLibcallCallingConv(LC.Op, LC.CC);
620  }
621  }
622 
623  if (Subtarget->isThumb1Only())
624  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
625  else
626  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
627 
628  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
629  Subtarget->hasFPRegs()) {
630  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
631  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
632  if (!Subtarget->hasVFP2Base())
633  setAllExpand(MVT::f32);
634  if (!Subtarget->hasFP64())
635  setAllExpand(MVT::f64);
636  }
637 
638  if (Subtarget->hasFullFP16()) {
639  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
643 
646  }
647 
648  for (MVT VT : MVT::vector_valuetypes()) {
649  for (MVT InnerVT : MVT::vector_valuetypes()) {
650  setTruncStoreAction(VT, InnerVT, Expand);
651  addAllExtLoads(VT, InnerVT, Expand);
652  }
653 
658 
660  }
661 
664 
667 
668  if (Subtarget->hasMVEIntegerOps())
669  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
670 
671  // Combine low-overhead loop intrinsics so that we can lower i1 types.
672  if (Subtarget->hasLOB())
674 
675  if (Subtarget->hasNEON()) {
676  addDRTypeForNEON(MVT::v2f32);
677  addDRTypeForNEON(MVT::v8i8);
678  addDRTypeForNEON(MVT::v4i16);
679  addDRTypeForNEON(MVT::v2i32);
680  addDRTypeForNEON(MVT::v1i64);
681 
682  addQRTypeForNEON(MVT::v4f32);
683  addQRTypeForNEON(MVT::v2f64);
684  addQRTypeForNEON(MVT::v16i8);
685  addQRTypeForNEON(MVT::v8i16);
686  addQRTypeForNEON(MVT::v4i32);
687  addQRTypeForNEON(MVT::v2i64);
688 
689  if (Subtarget->hasFullFP16()) {
690  addQRTypeForNEON(MVT::v8f16);
691  addDRTypeForNEON(MVT::v4f16);
692  }
693  }
694 
695  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
696  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
697  // none of Neon, MVE or VFP supports any arithmetic operations on it.
701  // FIXME: Code duplication: FDIV and FREM are expanded always, see
702  // ARMTargetLowering::addTypeForNEON method for details.
705  // FIXME: Create unittest.
706  // In another words, find a way when "copysign" appears in DAG with vector
707  // operands.
709  // FIXME: Code duplication: SETCC has custom operation action, see
710  // ARMTargetLowering::addTypeForNEON method for details.
712  // FIXME: Create unittest for FNEG and for FABS.
724  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
731  }
732 
733  if (Subtarget->hasNEON()) {
734  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
735  // supported for v4f32.
750 
751  // Mark v2f32 intrinsics.
766 
767  // Neon does not support some operations on v1i64 and v2i64 types.
769  // Custom handling for some quad-vector types to detect VMULL.
773  // Custom handling for some vector types to avoid expensive expansions
778  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
779  // a destination type that is wider than the source, and nor does
780  // it have a FP_TO_[SU]INT instruction with a narrower destination than
781  // source.
790 
793 
794  // NEON does not have single instruction CTPOP for vectors with element
795  // types wider than 8-bits. However, custom lowering can leverage the
796  // v8i8/v16i8 vcnt instruction.
803 
806 
807  // NEON does not have single instruction CTTZ for vectors.
812 
817 
822 
827 
828  // NEON only has FMA instructions as of VFP4.
829  if (!Subtarget->hasVFP4Base()) {
832  }
833 
848 
849  // It is legal to extload from v4i8 to v4i16 or v4i32.
851  MVT::v2i32}) {
852  for (MVT VT : MVT::integer_vector_valuetypes()) {
856  }
857  }
858  }
859 
860  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
864  }
865 
866  if (!Subtarget->hasFP64()) {
867  // When targeting a floating-point unit with only single-precision
868  // operations, f64 is legal for the few double-precision instructions which
869  // are present However, no double-precision operations other than moves,
870  // loads and stores are provided by the hardware.
902  }
903 
904  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){
907  }
908 
909  if (!Subtarget->hasFP16())
911 
912  if (!Subtarget->hasFP64())
914 
916 
917  // ARM does not have floating-point extending loads.
918  for (MVT VT : MVT::fp_valuetypes()) {
921  }
922 
923  // ... or truncating stores
927 
928  // ARM does not have i1 sign extending load.
929  for (MVT VT : MVT::integer_valuetypes())
931 
932  // ARM supports all 4 flavors of integer indexed load / store.
933  if (!Subtarget->isThumb1Only()) {
934  for (unsigned im = (unsigned)ISD::PRE_INC;
944  }
945  } else {
946  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
949  }
950 
955 
958 
959  // i64 operation support.
962  if (Subtarget->isThumb1Only()) {
965  }
966  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
967  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
969 
976 
977  // MVE lowers 64 bit shifts to lsll and lsrl
978  // assuming that ISD::SRL and SRA of i64 are already marked custom
979  if (Subtarget->hasMVEIntegerOps())
981 
982  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
983  if (Subtarget->isThumb1Only()) {
987  }
988 
989  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
991 
992  // ARM does not have ROTL.
994  for (MVT VT : MVT::vector_valuetypes()) {
997  }
1000  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1003  }
1004 
1005  // @llvm.readcyclecounter requires the Performance Monitors extension.
1006  // Default to the 0 expansion on unsupported platforms.
1007  // FIXME: Technically there are older ARM CPUs that have
1008  // implementation-specific ways of obtaining this information.
1009  if (Subtarget->hasPerfMon())
1011 
1012  // Only ARMv6 has BSWAP.
1013  if (!Subtarget->hasV6Ops())
1015 
1016  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1017  : Subtarget->hasDivideInARMMode();
1018  if (!hasDivide) {
1019  // These are expanded into libcalls if the cpu doesn't have HW divider.
1022  }
1023 
1024  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1027 
1030  }
1031 
1034 
1035  // Register based DivRem for AEABI (RTABI 4.2)
1036  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1037  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1038  Subtarget->isTargetWindows()) {
1041  HasStandaloneRem = false;
1042 
1043  if (Subtarget->isTargetWindows()) {
1044  const struct {
1045  const RTLIB::Libcall Op;
1046  const char * const Name;
1047  const CallingConv::ID CC;
1048  } LibraryCalls[] = {
1049  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1050  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1051  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1052  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1053 
1054  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1055  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1056  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1057  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1058  };
1059 
1060  for (const auto &LC : LibraryCalls) {
1061  setLibcallName(LC.Op, LC.Name);
1062  setLibcallCallingConv(LC.Op, LC.CC);
1063  }
1064  } else {
1065  const struct {
1066  const RTLIB::Libcall Op;
1067  const char * const Name;
1068  const CallingConv::ID CC;
1069  } LibraryCalls[] = {
1070  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1071  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1072  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1073  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1074 
1075  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1076  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1077  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1078  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1079  };
1080 
1081  for (const auto &LC : LibraryCalls) {
1082  setLibcallName(LC.Op, LC.Name);
1083  setLibcallCallingConv(LC.Op, LC.CC);
1084  }
1085  }
1086 
1091  } else {
1094  }
1095 
1096  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
1097  for (auto &VT : {MVT::f32, MVT::f64})
1099 
1104 
1107 
1108  // Use the default implementation.
1115 
1116  if (Subtarget->isTargetWindows())
1118  else
1120 
1121  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1122  // the default expansion.
1123  InsertFencesForAtomic = false;
1124  if (Subtarget->hasAnyDataBarrier() &&
1125  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1126  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1127  // to ldrex/strex loops already.
1129  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1131 
1132  // On v8, we have particularly efficient implementations of atomic fences
1133  // if they can be combined with nearby atomic loads and stores.
1134  if (!Subtarget->hasAcquireRelease() ||
1135  getTargetMachine().getOptLevel() == 0) {
1136  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1137  InsertFencesForAtomic = true;
1138  }
1139  } else {
1140  // If there's anything we can use as a barrier, go through custom lowering
1141  // for ATOMIC_FENCE.
1142  // If target has DMB in thumb, Fences can be inserted.
1143  if (Subtarget->hasDataBarrier())
1144  InsertFencesForAtomic = true;
1145 
1147  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1148 
1149  // Set them all for expansion, which will force libcalls.
1162  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1163  // Unordered/Monotonic case.
1164  if (!InsertFencesForAtomic) {
1167  }
1168  }
1169 
1171 
1172  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1173  if (!Subtarget->hasV6Ops()) {
1176  }
1178 
1179  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1180  !Subtarget->isThumb1Only()) {
1181  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1182  // iff target supports vfp2.
1185  }
1186 
1187  // We want to custom lower some of our intrinsics.
1192  if (Subtarget->useSjLjEH())
1193  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1194 
1204  if (Subtarget->hasFullFP16()) {
1208  }
1209 
1211 
1214  if (Subtarget->hasFullFP16())
1219 
1220  // We don't support sin/cos/fmod/copysign/pow
1229  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1230  !Subtarget->isThumb1Only()) {
1233  }
1236 
1237  if (!Subtarget->hasVFP4Base()) {
1240  }
1241 
1242  // Various VFP goodness
1243  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1244  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1245  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1248  }
1249 
1250  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1251  if (!Subtarget->hasFP16()) {
1254  }
1255  }
1256 
1257  // Use __sincos_stret if available.
1258  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1259  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1262  }
1263 
1264  // FP-ARMv8 implements a lot of rounding-like FP operations.
1265  if (Subtarget->hasFPARMv8Base()) {
1274  if (Subtarget->hasNEON()) {
1279  }
1280 
1281  if (Subtarget->hasFP64()) {
1290  }
1291  }
1292 
1293  // FP16 often need to be promoted to call lib functions
1294  if (Subtarget->hasFullFP16()) {
1307 
1309  }
1310 
1311  if (Subtarget->hasNEON()) {
1312  // vmin and vmax aren't available in a scalar form, so we use
1313  // a NEON instruction with an undef lane instead.
1322 
1323  if (Subtarget->hasFullFP16()) {
1328 
1333  }
1334  }
1335 
1336  // We have target-specific dag combine patterns for the following nodes:
1337  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1344 
1345  if (Subtarget->hasV6Ops())
1347  if (Subtarget->isThumb1Only())
1349 
1351 
1352  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1353  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1355  else
1357 
1358  //// temporary - rewrite interface to use type
1359  MaxStoresPerMemset = 8;
1361  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1363  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1365 
1366  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1367  // are at least 4 bytes aligned.
1369 
1370  // Prefer likely predicted branches to selects on out-of-order cores.
1371  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1372 
1374 
1375  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1376 
1377  if (Subtarget->isThumb() || Subtarget->isThumb2())
1379 }
1380 
1382  return Subtarget->useSoftFloat();
1383 }
1384 
1385 // FIXME: It might make sense to define the representative register class as the
1386 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1387 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1388 // SPR's representative would be DPR_VFP2. This should work well if register
1389 // pressure tracking were modified such that a register use would increment the
1390 // pressure of the register class's representative and all of it's super
1391 // classes' representatives transitively. We have not implemented this because
1392 // of the difficulty prior to coalescing of modeling operand register classes
1393 // due to the common occurrence of cross class copies and subregister insertions
1394 // and extractions.
1395 std::pair<const TargetRegisterClass *, uint8_t>
1397  MVT VT) const {
1398  const TargetRegisterClass *RRC = nullptr;
1399  uint8_t Cost = 1;
1400  switch (VT.SimpleTy) {
1401  default:
1403  // Use DPR as representative register class for all floating point
1404  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1405  // the cost is 1 for both f32 and f64.
1406  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1407  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1408  RRC = &ARM::DPRRegClass;
1409  // When NEON is used for SP, only half of the register file is available
1410  // because operations that define both SP and DP results will be constrained
1411  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1412  // coalescing by double-counting the SP regs. See the FIXME above.
1413  if (Subtarget->useNEONForSinglePrecisionFP())
1414  Cost = 2;
1415  break;
1416  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1417  case MVT::v4f32: case MVT::v2f64:
1418  RRC = &ARM::DPRRegClass;
1419  Cost = 2;
1420  break;
1421  case MVT::v4i64:
1422  RRC = &ARM::DPRRegClass;
1423  Cost = 4;
1424  break;
1425  case MVT::v8i64:
1426  RRC = &ARM::DPRRegClass;
1427  Cost = 8;
1428  break;
1429  }
1430  return std::make_pair(RRC, Cost);
1431 }
1432 
1433 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1434  switch ((ARMISD::NodeType)Opcode) {
1435  case ARMISD::FIRST_NUMBER: break;
1436  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1437  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1438  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1439  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1440  case ARMISD::CALL: return "ARMISD::CALL";
1441  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1442  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1443  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1444  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1445  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1446  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1447  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1448  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1449  case ARMISD::CMP: return "ARMISD::CMP";
1450  case ARMISD::CMN: return "ARMISD::CMN";
1451  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1452  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1453  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1454  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1455  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1456 
1457  case ARMISD::CMOV: return "ARMISD::CMOV";
1458  case ARMISD::SUBS: return "ARMISD::SUBS";
1459 
1460  case ARMISD::SSAT: return "ARMISD::SSAT";
1461  case ARMISD::USAT: return "ARMISD::USAT";
1462 
1463  case ARMISD::ASRL: return "ARMISD::ASRL";
1464  case ARMISD::LSRL: return "ARMISD::LSRL";
1465  case ARMISD::LSLL: return "ARMISD::LSLL";
1466 
1467  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1468  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1469  case ARMISD::RRX: return "ARMISD::RRX";
1470 
1471  case ARMISD::ADDC: return "ARMISD::ADDC";
1472  case ARMISD::ADDE: return "ARMISD::ADDE";
1473  case ARMISD::SUBC: return "ARMISD::SUBC";
1474  case ARMISD::SUBE: return "ARMISD::SUBE";
1475 
1476  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1477  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1478  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1479  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1480  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1481 
1482  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1483  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1484  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1485 
1486  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1487 
1488  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1489 
1490  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1491 
1492  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1493 
1494  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1495 
1496  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1497  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1498 
1499  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1500  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1501  case ARMISD::VCGE: return "ARMISD::VCGE";
1502  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1503  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1504  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1505  case ARMISD::VCGT: return "ARMISD::VCGT";
1506  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1507  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1508  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1509  case ARMISD::VTST: return "ARMISD::VTST";
1510 
1511  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1512  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1513  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1514  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1515  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1516  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1517  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1518  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1519  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1520  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1521  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1522  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1523  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1524  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1525  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1526  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1527  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1528  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1529  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1530  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1531  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1532  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1533  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1534  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1535  case ARMISD::VDUP: return "ARMISD::VDUP";
1536  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1537  case ARMISD::VEXT: return "ARMISD::VEXT";
1538  case ARMISD::VREV64: return "ARMISD::VREV64";
1539  case ARMISD::VREV32: return "ARMISD::VREV32";
1540  case ARMISD::VREV16: return "ARMISD::VREV16";
1541  case ARMISD::VZIP: return "ARMISD::VZIP";
1542  case ARMISD::VUZP: return "ARMISD::VUZP";
1543  case ARMISD::VTRN: return "ARMISD::VTRN";
1544  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1545  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1546  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1547  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1548  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1549  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1550  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1551  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1552  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1553  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1554  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1555  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1556  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1557  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1558  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1559  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1560  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1561  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1562  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1563  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1564  case ARMISD::BFI: return "ARMISD::BFI";
1565  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1566  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1567  case ARMISD::VBSL: return "ARMISD::VBSL";
1568  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1569  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1570  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1571  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1572  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1573  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1574  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1575  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1576  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1577  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1578  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1579  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1580  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1581  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1582  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1583  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1584  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1585  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1586  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1587  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1588  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1589  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1590  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1591  case ARMISD::WLS: return "ARMISD::WLS";
1592  }
1593  return nullptr;
1594 }
1595 
1597  EVT VT) const {
1598  if (!VT.isVector())
1599  return getPointerTy(DL);
1601 }
1602 
1603 /// getRegClassFor - Return the register class that should be used for the
1604 /// specified value type.
1605 const TargetRegisterClass *
1606 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1607  (void)isDivergent;
1608  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1609  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1610  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1611  // MVE Q registers.
1612  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1613  if (VT == MVT::v4i64)
1614  return &ARM::QQPRRegClass;
1615  if (VT == MVT::v8i64)
1616  return &ARM::QQQQPRRegClass;
1617  }
1618  return TargetLowering::getRegClassFor(VT);
1619 }
1620 
1621 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1622 // source/dest is aligned and the copy size is large enough. We therefore want
1623 // to align such objects passed to memory intrinsics.
1625  unsigned &PrefAlign) const {
1626  if (!isa<MemIntrinsic>(CI))
1627  return false;
1628  MinSize = 8;
1629  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1630  // cycle faster than 4-byte aligned LDM.
1631  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1632  return true;
1633 }
1634 
1635 // Create a fast isel object.
1636 FastISel *
1638  const TargetLibraryInfo *libInfo) const {
1639  return ARM::createFastISel(funcInfo, libInfo);
1640 }
1641 
1643  unsigned NumVals = N->getNumValues();
1644  if (!NumVals)
1645  return Sched::RegPressure;
1646 
1647  for (unsigned i = 0; i != NumVals; ++i) {
1648  EVT VT = N->getValueType(i);
1649  if (VT == MVT::Glue || VT == MVT::Other)
1650  continue;
1651  if (VT.isFloatingPoint() || VT.isVector())
1652  return Sched::ILP;
1653  }
1654 
1655  if (!N->isMachineOpcode())
1656  return Sched::RegPressure;
1657 
1658  // Load are scheduled for latency even if there instruction itinerary
1659  // is not available.
1660  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1661  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1662 
1663  if (MCID.getNumDefs() == 0)
1664  return Sched::RegPressure;
1665  if (!Itins->isEmpty() &&
1666  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1667  return Sched::ILP;
1668 
1669  return Sched::RegPressure;
1670 }
1671 
1672 //===----------------------------------------------------------------------===//
1673 // Lowering Code
1674 //===----------------------------------------------------------------------===//
1675 
1676 static bool isSRL16(const SDValue &Op) {
1677  if (Op.getOpcode() != ISD::SRL)
1678  return false;
1679  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1680  return Const->getZExtValue() == 16;
1681  return false;
1682 }
1683 
1684 static bool isSRA16(const SDValue &Op) {
1685  if (Op.getOpcode() != ISD::SRA)
1686  return false;
1687  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1688  return Const->getZExtValue() == 16;
1689  return false;
1690 }
1691 
1692 static bool isSHL16(const SDValue &Op) {
1693  if (Op.getOpcode() != ISD::SHL)
1694  return false;
1695  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1696  return Const->getZExtValue() == 16;
1697  return false;
1698 }
1699 
1700 // Check for a signed 16-bit value. We special case SRA because it makes it
1701 // more simple when also looking for SRAs that aren't sign extending a
1702 // smaller value. Without the check, we'd need to take extra care with
1703 // checking order for some operations.
1704 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1705  if (isSRA16(Op))
1706  return isSHL16(Op.getOperand(0));
1707  return DAG.ComputeNumSignBits(Op) == 17;
1708 }
1709 
1710 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1712  switch (CC) {
1713  default: llvm_unreachable("Unknown condition code!");
1714  case ISD::SETNE: return ARMCC::NE;
1715  case ISD::SETEQ: return ARMCC::EQ;
1716  case ISD::SETGT: return ARMCC::GT;
1717  case ISD::SETGE: return ARMCC::GE;
1718  case ISD::SETLT: return ARMCC::LT;
1719  case ISD::SETLE: return ARMCC::LE;
1720  case ISD::SETUGT: return ARMCC::HI;
1721  case ISD::SETUGE: return ARMCC::HS;
1722  case ISD::SETULT: return ARMCC::LO;
1723  case ISD::SETULE: return ARMCC::LS;
1724  }
1725 }
1726 
1727 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1729  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1730  CondCode2 = ARMCC::AL;
1731  InvalidOnQNaN = true;
1732  switch (CC) {
1733  default: llvm_unreachable("Unknown FP condition!");
1734  case ISD::SETEQ:
1735  case ISD::SETOEQ:
1736  CondCode = ARMCC::EQ;
1737  InvalidOnQNaN = false;
1738  break;
1739  case ISD::SETGT:
1740  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1741  case ISD::SETGE:
1742  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1743  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1744  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1745  case ISD::SETONE:
1746  CondCode = ARMCC::MI;
1747  CondCode2 = ARMCC::GT;
1748  InvalidOnQNaN = false;
1749  break;
1750  case ISD::SETO: CondCode = ARMCC::VC; break;
1751  case ISD::SETUO: CondCode = ARMCC::VS; break;
1752  case ISD::SETUEQ:
1753  CondCode = ARMCC::EQ;
1754  CondCode2 = ARMCC::VS;
1755  InvalidOnQNaN = false;
1756  break;
1757  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1758  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1759  case ISD::SETLT:
1760  case ISD::SETULT: CondCode = ARMCC::LT; break;
1761  case ISD::SETLE:
1762  case ISD::SETULE: CondCode = ARMCC::LE; break;
1763  case ISD::SETNE:
1764  case ISD::SETUNE:
1765  CondCode = ARMCC::NE;
1766  InvalidOnQNaN = false;
1767  break;
1768  }
1769 }
1770 
1771 //===----------------------------------------------------------------------===//
1772 // Calling Convention Implementation
1773 //===----------------------------------------------------------------------===//
1774 
1775 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1776 /// account presence of floating point hardware and calling convention
1777 /// limitations, such as support for variadic functions.
1779 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1780  bool isVarArg) const {
1781  switch (CC) {
1782  default:
1783  report_fatal_error("Unsupported calling convention");
1785  case CallingConv::ARM_APCS:
1786  case CallingConv::GHC:
1787  return CC;
1791  case CallingConv::Swift:
1793  case CallingConv::C:
1794  if (!Subtarget->isAAPCS_ABI())
1795  return CallingConv::ARM_APCS;
1796  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1798  !isVarArg)
1800  else
1801  return CallingConv::ARM_AAPCS;
1802  case CallingConv::Fast:
1804  if (!Subtarget->isAAPCS_ABI()) {
1805  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1806  return CallingConv::Fast;
1807  return CallingConv::ARM_APCS;
1808  } else if (Subtarget->hasVFP2Base() &&
1809  !Subtarget->isThumb1Only() && !isVarArg)
1811  else
1812  return CallingConv::ARM_AAPCS;
1813  }
1814 }
1815 
1817  bool isVarArg) const {
1818  return CCAssignFnForNode(CC, false, isVarArg);
1819 }
1820 
1822  bool isVarArg) const {
1823  return CCAssignFnForNode(CC, true, isVarArg);
1824 }
1825 
1826 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1827 /// CallingConvention.
1828 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1829  bool Return,
1830  bool isVarArg) const {
1831  switch (getEffectiveCallingConv(CC, isVarArg)) {
1832  default:
1833  report_fatal_error("Unsupported calling convention");
1834  case CallingConv::ARM_APCS:
1835  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1837  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1839  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1840  case CallingConv::Fast:
1841  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1842  case CallingConv::GHC:
1843  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1845  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1846  }
1847 }
1848 
1849 /// LowerCallResult - Lower the result values of a call into the
1850 /// appropriate copies out of appropriate physical registers.
1851 SDValue ARMTargetLowering::LowerCallResult(
1852  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1853  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1854  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1855  SDValue ThisVal) const {
1856  // Assign locations to each value returned by this call.
1858  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1859  *DAG.getContext());
1860  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1861 
1862  // Copy all of the result registers out of their specified physreg.
1863  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1864  CCValAssign VA = RVLocs[i];
1865 
1866  // Pass 'this' value directly from the argument to return value, to avoid
1867  // reg unit interference
1868  if (i == 0 && isThisReturn) {
1869  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1870  "unexpected return calling convention register assignment");
1871  InVals.push_back(ThisVal);
1872  continue;
1873  }
1874 
1875  SDValue Val;
1876  if (VA.needsCustom()) {
1877  // Handle f64 or half of a v2f64.
1878  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1879  InFlag);
1880  Chain = Lo.getValue(1);
1881  InFlag = Lo.getValue(2);
1882  VA = RVLocs[++i]; // skip ahead to next loc
1883  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1884  InFlag);
1885  Chain = Hi.getValue(1);
1886  InFlag = Hi.getValue(2);
1887  if (!Subtarget->isLittle())
1888  std::swap (Lo, Hi);
1889  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1890 
1891  if (VA.getLocVT() == MVT::v2f64) {
1892  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1893  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1894  DAG.getConstant(0, dl, MVT::i32));
1895 
1896  VA = RVLocs[++i]; // skip ahead to next loc
1897  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1898  Chain = Lo.getValue(1);
1899  InFlag = Lo.getValue(2);
1900  VA = RVLocs[++i]; // skip ahead to next loc
1901  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1902  Chain = Hi.getValue(1);
1903  InFlag = Hi.getValue(2);
1904  if (!Subtarget->isLittle())
1905  std::swap (Lo, Hi);
1906  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1907  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1908  DAG.getConstant(1, dl, MVT::i32));
1909  }
1910  } else {
1911  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1912  InFlag);
1913  Chain = Val.getValue(1);
1914  InFlag = Val.getValue(2);
1915  }
1916 
1917  switch (VA.getLocInfo()) {
1918  default: llvm_unreachable("Unknown loc info!");
1919  case CCValAssign::Full: break;
1920  case CCValAssign::BCvt:
1921  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1922  break;
1923  }
1924 
1925  InVals.push_back(Val);
1926  }
1927 
1928  return Chain;
1929 }
1930 
1931 /// LowerMemOpCallTo - Store the argument to the stack.
1932 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1933  SDValue Arg, const SDLoc &dl,
1934  SelectionDAG &DAG,
1935  const CCValAssign &VA,
1936  ISD::ArgFlagsTy Flags) const {
1937  unsigned LocMemOffset = VA.getLocMemOffset();
1938  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1939  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1940  StackPtr, PtrOff);
1941  return DAG.getStore(
1942  Chain, dl, Arg, PtrOff,
1943  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1944 }
1945 
1946 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1947  SDValue Chain, SDValue &Arg,
1948  RegsToPassVector &RegsToPass,
1949  CCValAssign &VA, CCValAssign &NextVA,
1950  SDValue &StackPtr,
1951  SmallVectorImpl<SDValue> &MemOpChains,
1952  ISD::ArgFlagsTy Flags) const {
1953  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1954  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1955  unsigned id = Subtarget->isLittle() ? 0 : 1;
1956  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1957 
1958  if (NextVA.isRegLoc())
1959  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1960  else {
1961  assert(NextVA.isMemLoc());
1962  if (!StackPtr.getNode())
1963  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1964  getPointerTy(DAG.getDataLayout()));
1965 
1966  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1967  dl, DAG, NextVA,
1968  Flags));
1969  }
1970 }
1971 
1972 /// LowerCall - Lowering a call into a callseq_start <-
1973 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1974 /// nodes.
1975 SDValue
1976 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1977  SmallVectorImpl<SDValue> &InVals) const {
1978  SelectionDAG &DAG = CLI.DAG;
1979  SDLoc &dl = CLI.DL;
1981  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1983  SDValue Chain = CLI.Chain;
1984  SDValue Callee = CLI.Callee;
1985  bool &isTailCall = CLI.IsTailCall;
1986  CallingConv::ID CallConv = CLI.CallConv;
1987  bool doesNotRet = CLI.DoesNotReturn;
1988  bool isVarArg = CLI.IsVarArg;
1989 
1990  MachineFunction &MF = DAG.getMachineFunction();
1991  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1992  bool isThisReturn = false;
1993  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1994  bool PreferIndirect = false;
1995 
1996  // Disable tail calls if they're not supported.
1997  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1998  isTailCall = false;
1999 
2000  if (isa<GlobalAddressSDNode>(Callee)) {
2001  // If we're optimizing for minimum size and the function is called three or
2002  // more times in this block, we can improve codesize by calling indirectly
2003  // as BLXr has a 16-bit encoding.
2004  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2005  if (CLI.CS) {
2006  auto *BB = CLI.CS.getParent();
2007  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2008  count_if(GV->users(), [&BB](const User *U) {
2009  return isa<Instruction>(U) &&
2010  cast<Instruction>(U)->getParent() == BB;
2011  }) > 2;
2012  }
2013  }
2014  if (isTailCall) {
2015  // Check if it's really possible to do a tail call.
2016  isTailCall = IsEligibleForTailCallOptimization(
2017  Callee, CallConv, isVarArg, isStructRet,
2018  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2019  PreferIndirect);
2020  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2021  report_fatal_error("failed to perform tail call elimination on a call "
2022  "site marked musttail");
2023  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2024  // detected sibcalls.
2025  if (isTailCall)
2026  ++NumTailCalls;
2027  }
2028 
2029  // Analyze operands of the call, assigning locations to each operand.
2031  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2032  *DAG.getContext());
2033  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2034 
2035  // Get a count of how many bytes are to be pushed on the stack.
2036  unsigned NumBytes = CCInfo.getNextStackOffset();
2037 
2038  if (isTailCall) {
2039  // For tail calls, memory operands are available in our caller's stack.
2040  NumBytes = 0;
2041  } else {
2042  // Adjust the stack pointer for the new arguments...
2043  // These operations are automatically eliminated by the prolog/epilog pass
2044  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2045  }
2046 
2047  SDValue StackPtr =
2048  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2049 
2050  RegsToPassVector RegsToPass;
2051  SmallVector<SDValue, 8> MemOpChains;
2052 
2053  // Walk the register/memloc assignments, inserting copies/loads. In the case
2054  // of tail call optimization, arguments are handled later.
2055  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2056  i != e;
2057  ++i, ++realArgIdx) {
2058  CCValAssign &VA = ArgLocs[i];
2059  SDValue Arg = OutVals[realArgIdx];
2060  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2061  bool isByVal = Flags.isByVal();
2062 
2063  // Promote the value if needed.
2064  switch (VA.getLocInfo()) {
2065  default: llvm_unreachable("Unknown loc info!");
2066  case CCValAssign::Full: break;
2067  case CCValAssign::SExt:
2068  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2069  break;
2070  case CCValAssign::ZExt:
2071  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2072  break;
2073  case CCValAssign::AExt:
2074  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2075  break;
2076  case CCValAssign::BCvt:
2077  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2078  break;
2079  }
2080 
2081  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2082  if (VA.needsCustom()) {
2083  if (VA.getLocVT() == MVT::v2f64) {
2084  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2085  DAG.getConstant(0, dl, MVT::i32));
2086  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2087  DAG.getConstant(1, dl, MVT::i32));
2088 
2089  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2090  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2091 
2092  VA = ArgLocs[++i]; // skip ahead to next loc
2093  if (VA.isRegLoc()) {
2094  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2095  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2096  } else {
2097  assert(VA.isMemLoc());
2098 
2099  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2100  dl, DAG, VA, Flags));
2101  }
2102  } else {
2103  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2104  StackPtr, MemOpChains, Flags);
2105  }
2106  } else if (VA.isRegLoc()) {
2107  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2108  Outs[0].VT == MVT::i32) {
2109  assert(VA.getLocVT() == MVT::i32 &&
2110  "unexpected calling convention register assignment");
2111  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2112  "unexpected use of 'returned'");
2113  isThisReturn = true;
2114  }
2115  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2116  } else if (isByVal) {
2117  assert(VA.isMemLoc());
2118  unsigned offset = 0;
2119 
2120  // True if this byval aggregate will be split between registers
2121  // and memory.
2122  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2123  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2124 
2125  if (CurByValIdx < ByValArgsCount) {
2126 
2127  unsigned RegBegin, RegEnd;
2128  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2129 
2130  EVT PtrVT =
2132  unsigned int i, j;
2133  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2134  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2135  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2136  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2138  DAG.InferPtrAlignment(AddArg));
2139  MemOpChains.push_back(Load.getValue(1));
2140  RegsToPass.push_back(std::make_pair(j, Load));
2141  }
2142 
2143  // If parameter size outsides register area, "offset" value
2144  // helps us to calculate stack slot for remained part properly.
2145  offset = RegEnd - RegBegin;
2146 
2147  CCInfo.nextInRegsParam();
2148  }
2149 
2150  if (Flags.getByValSize() > 4*offset) {
2151  auto PtrVT = getPointerTy(DAG.getDataLayout());
2152  unsigned LocMemOffset = VA.getLocMemOffset();
2153  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2154  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2155  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2156  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2157  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2158  MVT::i32);
2159  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2160  MVT::i32);
2161 
2162  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2163  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2164  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2165  Ops));
2166  }
2167  } else if (!isTailCall) {
2168  assert(VA.isMemLoc());
2169 
2170  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2171  dl, DAG, VA, Flags));
2172  }
2173  }
2174 
2175  if (!MemOpChains.empty())
2176  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2177 
2178  // Build a sequence of copy-to-reg nodes chained together with token chain
2179  // and flag operands which copy the outgoing args into the appropriate regs.
2180  SDValue InFlag;
2181  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2182  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2183  RegsToPass[i].second, InFlag);
2184  InFlag = Chain.getValue(1);
2185  }
2186 
2187  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2188  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2189  // node so that legalize doesn't hack it.
2190  bool isDirect = false;
2191 
2192  const TargetMachine &TM = getTargetMachine();
2193  const Module *Mod = MF.getFunction().getParent();
2194  const GlobalValue *GV = nullptr;
2195  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2196  GV = G->getGlobal();
2197  bool isStub =
2198  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2199 
2200  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2201  bool isLocalARMFunc = false;
2203  auto PtrVt = getPointerTy(DAG.getDataLayout());
2204 
2205  if (Subtarget->genLongCalls()) {
2206  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2207  "long-calls codegen is not position independent!");
2208  // Handle a global address or an external symbol. If it's not one of
2209  // those, the target's already in a register, so we don't need to do
2210  // anything extra.
2211  if (isa<GlobalAddressSDNode>(Callee)) {
2212  // Create a constant pool entry for the callee address
2213  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2214  ARMConstantPoolValue *CPV =
2215  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2216 
2217  // Get the address of the callee into a register
2218  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2219  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2220  Callee = DAG.getLoad(
2221  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2223  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2224  const char *Sym = S->getSymbol();
2225 
2226  // Create a constant pool entry for the callee address
2227  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2228  ARMConstantPoolValue *CPV =
2230  ARMPCLabelIndex, 0);
2231  // Get the address of the callee into a register
2232  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2233  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2234  Callee = DAG.getLoad(
2235  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2237  }
2238  } else if (isa<GlobalAddressSDNode>(Callee)) {
2239  if (!PreferIndirect) {
2240  isDirect = true;
2241  bool isDef = GV->isStrongDefinitionForLinker();
2242 
2243  // ARM call to a local ARM function is predicable.
2244  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2245  // tBX takes a register source operand.
2246  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2247  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2248  Callee = DAG.getNode(
2249  ARMISD::WrapperPIC, dl, PtrVt,
2250  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2251  Callee = DAG.getLoad(
2252  PtrVt, dl, DAG.getEntryNode(), Callee,
2254  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2256  } else if (Subtarget->isTargetCOFF()) {
2257  assert(Subtarget->isTargetWindows() &&
2258  "Windows is the only supported COFF target");
2259  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2262  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2263  TargetFlags);
2264  if (GV->hasDLLImportStorageClass())
2265  Callee =
2266  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2267  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2269  } else {
2270  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2271  }
2272  }
2273  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2274  isDirect = true;
2275  // tBX takes a register source operand.
2276  const char *Sym = S->getSymbol();
2277  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2278  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2279  ARMConstantPoolValue *CPV =
2281  ARMPCLabelIndex, 4);
2282  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2283  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2284  Callee = DAG.getLoad(
2285  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2287  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2288  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2289  } else {
2290  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2291  }
2292  }
2293 
2294  // FIXME: handle tail calls differently.
2295  unsigned CallOpc;
2296  if (Subtarget->isThumb()) {
2297  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2298  CallOpc = ARMISD::CALL_NOLINK;
2299  else
2300  CallOpc = ARMISD::CALL;
2301  } else {
2302  if (!isDirect && !Subtarget->hasV5TOps())
2303  CallOpc = ARMISD::CALL_NOLINK;
2304  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2305  // Emit regular call when code size is the priority
2306  !Subtarget->hasMinSize())
2307  // "mov lr, pc; b _foo" to avoid confusing the RSP
2308  CallOpc = ARMISD::CALL_NOLINK;
2309  else
2310  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2311  }
2312 
2313  std::vector<SDValue> Ops;
2314  Ops.push_back(Chain);
2315  Ops.push_back(Callee);
2316 
2317  // Add argument registers to the end of the list so that they are known live
2318  // into the call.
2319  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2320  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2321  RegsToPass[i].second.getValueType()));
2322 
2323  // Add a register mask operand representing the call-preserved registers.
2324  if (!isTailCall) {
2325  const uint32_t *Mask;
2326  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2327  if (isThisReturn) {
2328  // For 'this' returns, use the R0-preserving mask if applicable
2329  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2330  if (!Mask) {
2331  // Set isThisReturn to false if the calling convention is not one that
2332  // allows 'returned' to be modeled in this way, so LowerCallResult does
2333  // not try to pass 'this' straight through
2334  isThisReturn = false;
2335  Mask = ARI->getCallPreservedMask(MF, CallConv);
2336  }
2337  } else
2338  Mask = ARI->getCallPreservedMask(MF, CallConv);
2339 
2340  assert(Mask && "Missing call preserved mask for calling convention");
2341  Ops.push_back(DAG.getRegisterMask(Mask));
2342  }
2343 
2344  if (InFlag.getNode())
2345  Ops.push_back(InFlag);
2346 
2347  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2348  if (isTailCall) {
2350  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2351  }
2352 
2353  // Returns a chain and a flag for retval copy to use.
2354  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2355  InFlag = Chain.getValue(1);
2356 
2357  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2358  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2359  if (!Ins.empty())
2360  InFlag = Chain.getValue(1);
2361 
2362  // Handle result values, copying them out of physregs into vregs that we
2363  // return.
2364  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2365  InVals, isThisReturn,
2366  isThisReturn ? OutVals[0] : SDValue());
2367 }
2368 
2369 /// HandleByVal - Every parameter *after* a byval parameter is passed
2370 /// on the stack. Remember the next parameter register to allocate,
2371 /// and then confiscate the rest of the parameter registers to insure
2372 /// this.
2373 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2374  unsigned Align) const {
2375  // Byval (as with any stack) slots are always at least 4 byte aligned.
2376  Align = std::max(Align, 4U);
2377 
2378  unsigned Reg = State->AllocateReg(GPRArgRegs);
2379  if (!Reg)
2380  return;
2381 
2382  unsigned AlignInRegs = Align / 4;
2383  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2384  for (unsigned i = 0; i < Waste; ++i)
2385  Reg = State->AllocateReg(GPRArgRegs);
2386 
2387  if (!Reg)
2388  return;
2389 
2390  unsigned Excess = 4 * (ARM::R4 - Reg);
2391 
2392  // Special case when NSAA != SP and parameter size greater than size of
2393  // all remained GPR regs. In that case we can't split parameter, we must
2394  // send it to stack. We also must set NCRN to R4, so waste all
2395  // remained registers.
2396  const unsigned NSAAOffset = State->getNextStackOffset();
2397  if (NSAAOffset != 0 && Size > Excess) {
2398  while (State->AllocateReg(GPRArgRegs))
2399  ;
2400  return;
2401  }
2402 
2403  // First register for byval parameter is the first register that wasn't
2404  // allocated before this method call, so it would be "reg".
2405  // If parameter is small enough to be saved in range [reg, r4), then
2406  // the end (first after last) register would be reg + param-size-in-regs,
2407  // else parameter would be splitted between registers and stack,
2408  // end register would be r4 in this case.
2409  unsigned ByValRegBegin = Reg;
2410  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2411  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2412  // Note, first register is allocated in the beginning of function already,
2413  // allocate remained amount of registers we need.
2414  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2415  State->AllocateReg(GPRArgRegs);
2416  // A byval parameter that is split between registers and memory needs its
2417  // size truncated here.
2418  // In the case where the entire structure fits in registers, we set the
2419  // size in memory to zero.
2420  Size = std::max<int>(Size - Excess, 0);
2421 }
2422 
2423 /// MatchingStackOffset - Return true if the given stack call argument is
2424 /// already available in the same position (relatively) of the caller's
2425 /// incoming argument stack.
2426 static
2429  const TargetInstrInfo *TII) {
2430  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2431  int FI = std::numeric_limits<int>::max();
2432  if (Arg.getOpcode() == ISD::CopyFromReg) {
2433  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2435  return false;
2436  MachineInstr *Def = MRI->getVRegDef(VR);
2437  if (!Def)
2438  return false;
2439  if (!Flags.isByVal()) {
2440  if (!TII->isLoadFromStackSlot(*Def, FI))
2441  return false;
2442  } else {
2443  return false;
2444  }
2445  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2446  if (Flags.isByVal())
2447  // ByVal argument is passed in as a pointer but it's now being
2448  // dereferenced. e.g.
2449  // define @foo(%struct.X* %A) {
2450  // tail call @bar(%struct.X* byval %A)
2451  // }
2452  return false;
2453  SDValue Ptr = Ld->getBasePtr();
2454  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2455  if (!FINode)
2456  return false;
2457  FI = FINode->getIndex();
2458  } else
2459  return false;
2460 
2462  if (!MFI.isFixedObjectIndex(FI))
2463  return false;
2464  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2465 }
2466 
2467 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2468 /// for tail call optimization. Targets which want to do tail call
2469 /// optimization should implement this function.
2470 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2471  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2472  bool isCalleeStructRet, bool isCallerStructRet,
2473  const SmallVectorImpl<ISD::OutputArg> &Outs,
2474  const SmallVectorImpl<SDValue> &OutVals,
2476  const bool isIndirect) const {
2477  MachineFunction &MF = DAG.getMachineFunction();
2478  const Function &CallerF = MF.getFunction();
2479  CallingConv::ID CallerCC = CallerF.getCallingConv();
2480 
2481  assert(Subtarget->supportsTailCall());
2482 
2483  // Indirect tail calls cannot be optimized for Thumb1 if the args
2484  // to the call take up r0-r3. The reason is that there are no legal registers
2485  // left to hold the pointer to the function to be called.
2486  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2487  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2488  return false;
2489 
2490  // Look for obvious safe cases to perform tail call optimization that do not
2491  // require ABI changes. This is what gcc calls sibcall.
2492 
2493  // Exception-handling functions need a special set of instructions to indicate
2494  // a return to the hardware. Tail-calling another function would probably
2495  // break this.
2496  if (CallerF.hasFnAttribute("interrupt"))
2497  return false;
2498 
2499  // Also avoid sibcall optimization if either caller or callee uses struct
2500  // return semantics.
2501  if (isCalleeStructRet || isCallerStructRet)
2502  return false;
2503 
2504  // Externally-defined functions with weak linkage should not be
2505  // tail-called on ARM when the OS does not support dynamic
2506  // pre-emption of symbols, as the AAELF spec requires normal calls
2507  // to undefined weak functions to be replaced with a NOP or jump to the
2508  // next instruction. The behaviour of branch instructions in this
2509  // situation (as used for tail calls) is implementation-defined, so we
2510  // cannot rely on the linker replacing the tail call with a return.
2511  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2512  const GlobalValue *GV = G->getGlobal();
2514  if (GV->hasExternalWeakLinkage() &&
2515  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2516  return false;
2517  }
2518 
2519  // Check that the call results are passed in the same way.
2520  LLVMContext &C = *DAG.getContext();
2521  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2522  CCAssignFnForReturn(CalleeCC, isVarArg),
2523  CCAssignFnForReturn(CallerCC, isVarArg)))
2524  return false;
2525  // The callee has to preserve all registers the caller needs to preserve.
2526  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2527  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2528  if (CalleeCC != CallerCC) {
2529  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2530  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2531  return false;
2532  }
2533 
2534  // If Caller's vararg or byval argument has been split between registers and
2535  // stack, do not perform tail call, since part of the argument is in caller's
2536  // local frame.
2537  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2538  if (AFI_Caller->getArgRegsSaveSize())
2539  return false;
2540 
2541  // If the callee takes no arguments then go on to check the results of the
2542  // call.
2543  if (!Outs.empty()) {
2544  // Check if stack adjustment is needed. For now, do not do this if any
2545  // argument is passed on the stack.
2547  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2548  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2549  if (CCInfo.getNextStackOffset()) {
2550  // Check if the arguments are already laid out in the right way as
2551  // the caller's fixed stack objects.
2552  MachineFrameInfo &MFI = MF.getFrameInfo();
2553  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2554  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2555  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2556  i != e;
2557  ++i, ++realArgIdx) {
2558  CCValAssign &VA = ArgLocs[i];
2559  EVT RegVT = VA.getLocVT();
2560  SDValue Arg = OutVals[realArgIdx];
2561  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2562  if (VA.getLocInfo() == CCValAssign::Indirect)
2563  return false;
2564  if (VA.needsCustom()) {
2565  // f64 and vector types are split into multiple registers or
2566  // register/stack-slot combinations. The types will not match
2567  // the registers; give up on memory f64 refs until we figure
2568  // out what to do about this.
2569  if (!VA.isRegLoc())
2570  return false;
2571  if (!ArgLocs[++i].isRegLoc())
2572  return false;
2573  if (RegVT == MVT::v2f64) {
2574  if (!ArgLocs[++i].isRegLoc())
2575  return false;
2576  if (!ArgLocs[++i].isRegLoc())
2577  return false;
2578  }
2579  } else if (!VA.isRegLoc()) {
2580  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2581  MFI, MRI, TII))
2582  return false;
2583  }
2584  }
2585  }
2586 
2587  const MachineRegisterInfo &MRI = MF.getRegInfo();
2588  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2589  return false;
2590  }
2591 
2592  return true;
2593 }
2594 
2595 bool
2596 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2597  MachineFunction &MF, bool isVarArg,
2598  const SmallVectorImpl<ISD::OutputArg> &Outs,
2599  LLVMContext &Context) const {
2601  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2602  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2603 }
2604 
2606  const SDLoc &DL, SelectionDAG &DAG) {
2607  const MachineFunction &MF = DAG.getMachineFunction();
2608  const Function &F = MF.getFunction();
2609 
2610  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2611 
2612  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2613  // version of the "preferred return address". These offsets affect the return
2614  // instruction if this is a return from PL1 without hypervisor extensions.
2615  // IRQ/FIQ: +4 "subs pc, lr, #4"
2616  // SWI: 0 "subs pc, lr, #0"
2617  // ABORT: +4 "subs pc, lr, #4"
2618  // UNDEF: +4/+2 "subs pc, lr, #0"
2619  // UNDEF varies depending on where the exception came from ARM or Thumb
2620  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2621 
2622  int64_t LROffset;
2623  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2624  IntKind == "ABORT")
2625  LROffset = 4;
2626  else if (IntKind == "SWI" || IntKind == "UNDEF")
2627  LROffset = 0;
2628  else
2629  report_fatal_error("Unsupported interrupt attribute. If present, value "
2630  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2631 
2632  RetOps.insert(RetOps.begin() + 1,
2633  DAG.getConstant(LROffset, DL, MVT::i32, false));
2634 
2635  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2636 }
2637 
2638 SDValue
2639 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2640  bool isVarArg,
2641  const SmallVectorImpl<ISD::OutputArg> &Outs,
2642  const SmallVectorImpl<SDValue> &OutVals,
2643  const SDLoc &dl, SelectionDAG &DAG) const {
2644  // CCValAssign - represent the assignment of the return value to a location.
2646 
2647  // CCState - Info about the registers and stack slots.
2648  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2649  *DAG.getContext());
2650 
2651  // Analyze outgoing return values.
2652  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2653 
2654  SDValue Flag;
2655  SmallVector<SDValue, 4> RetOps;
2656  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2657  bool isLittleEndian = Subtarget->isLittle();
2658 
2659  MachineFunction &MF = DAG.getMachineFunction();
2661  AFI->setReturnRegsCount(RVLocs.size());
2662 
2663  // Copy the result values into the output registers.
2664  for (unsigned i = 0, realRVLocIdx = 0;
2665  i != RVLocs.size();
2666  ++i, ++realRVLocIdx) {
2667  CCValAssign &VA = RVLocs[i];
2668  assert(VA.isRegLoc() && "Can only return in registers!");
2669 
2670  SDValue Arg = OutVals[realRVLocIdx];
2671  bool ReturnF16 = false;
2672 
2673  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2674  // Half-precision return values can be returned like this:
2675  //
2676  // t11 f16 = fadd ...
2677  // t12: i16 = bitcast t11
2678  // t13: i32 = zero_extend t12
2679  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2680  //
2681  // to avoid code generation for bitcasts, we simply set Arg to the node
2682  // that produces the f16 value, t11 in this case.
2683  //
2684  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2685  SDValue ZE = Arg.getOperand(0);
2686  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2687  SDValue BC = ZE.getOperand(0);
2688  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2689  Arg = BC.getOperand(0);
2690  ReturnF16 = true;
2691  }
2692  }
2693  }
2694  }
2695 
2696  switch (VA.getLocInfo()) {
2697  default: llvm_unreachable("Unknown loc info!");
2698  case CCValAssign::Full: break;
2699  case CCValAssign::BCvt:
2700  if (!ReturnF16)
2701  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2702  break;
2703  }
2704 
2705  if (VA.needsCustom()) {
2706  if (VA.getLocVT() == MVT::v2f64) {
2707  // Extract the first half and return it in two registers.
2708  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2709  DAG.getConstant(0, dl, MVT::i32));
2710  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2711  DAG.getVTList(MVT::i32, MVT::i32), Half);
2712 
2713  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2714  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2715  Flag);
2716  Flag = Chain.getValue(1);
2717  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2718  VA = RVLocs[++i]; // skip ahead to next loc
2719  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2720  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2721  Flag);
2722  Flag = Chain.getValue(1);
2723  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2724  VA = RVLocs[++i]; // skip ahead to next loc
2725 
2726  // Extract the 2nd half and fall through to handle it as an f64 value.
2727  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2728  DAG.getConstant(1, dl, MVT::i32));
2729  }
2730  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2731  // available.
2732  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2733  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2734  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2735  fmrrd.getValue(isLittleEndian ? 0 : 1),
2736  Flag);
2737  Flag = Chain.getValue(1);
2738  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2739  VA = RVLocs[++i]; // skip ahead to next loc
2740  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2741  fmrrd.getValue(isLittleEndian ? 1 : 0),
2742  Flag);
2743  } else
2744  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2745 
2746  // Guarantee that all emitted copies are
2747  // stuck together, avoiding something bad.
2748  Flag = Chain.getValue(1);
2749  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2750  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2751  }
2752  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2753  const MCPhysReg *I =
2755  if (I) {
2756  for (; *I; ++I) {
2757  if (ARM::GPRRegClass.contains(*I))
2758  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2759  else if (ARM::DPRRegClass.contains(*I))
2760  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2761  else
2762  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2763  }
2764  }
2765 
2766  // Update chain and glue.
2767  RetOps[0] = Chain;
2768  if (Flag.getNode())
2769  RetOps.push_back(Flag);
2770 
2771  // CPUs which aren't M-class use a special sequence to return from
2772  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2773  // though we use "subs pc, lr, #N").
2774  //
2775  // M-class CPUs actually use a normal return sequence with a special
2776  // (hardware-provided) value in LR, so the normal code path works.
2777  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2778  !Subtarget->isMClass()) {
2779  if (Subtarget->isThumb1Only())
2780  report_fatal_error("interrupt attribute is not supported in Thumb1");
2781  return LowerInterruptReturn(RetOps, dl, DAG);
2782  }
2783 
2784  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2785 }
2786 
2787 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2788  if (N->getNumValues() != 1)
2789  return false;
2790  if (!N->hasNUsesOfValue(1, 0))
2791  return false;
2792 
2793  SDValue TCChain = Chain;
2794  SDNode *Copy = *N->use_begin();
2795  if (Copy->getOpcode() == ISD::CopyToReg) {
2796  // If the copy has a glue operand, we conservatively assume it isn't safe to
2797  // perform a tail call.
2798  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2799  return false;
2800  TCChain = Copy->getOperand(0);
2801  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2802  SDNode *VMov = Copy;
2803  // f64 returned in a pair of GPRs.
2805  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2806  UI != UE; ++UI) {
2807  if (UI->getOpcode() != ISD::CopyToReg)
2808  return false;
2809  Copies.insert(*UI);
2810  }
2811  if (Copies.size() > 2)
2812  return false;
2813 
2814  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2815  UI != UE; ++UI) {
2816  SDValue UseChain = UI->getOperand(0);
2817  if (Copies.count(UseChain.getNode()))
2818  // Second CopyToReg
2819  Copy = *UI;
2820  else {
2821  // We are at the top of this chain.
2822  // If the copy has a glue operand, we conservatively assume it
2823  // isn't safe to perform a tail call.
2824  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2825  return false;
2826  // First CopyToReg
2827  TCChain = UseChain;
2828  }
2829  }
2830  } else if (Copy->getOpcode() == ISD::BITCAST) {
2831  // f32 returned in a single GPR.
2832  if (!Copy->hasOneUse())
2833  return false;
2834  Copy = *Copy->use_begin();
2835  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2836  return false;
2837  // If the copy has a glue operand, we conservatively assume it isn't safe to
2838  // perform a tail call.
2839  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2840  return false;
2841  TCChain = Copy->getOperand(0);
2842  } else {
2843  return false;
2844  }
2845 
2846  bool HasRet = false;
2847  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2848  UI != UE; ++UI) {
2849  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2850  UI->getOpcode() != ARMISD::INTRET_FLAG)
2851  return false;
2852  HasRet = true;
2853  }
2854 
2855  if (!HasRet)
2856  return false;
2857 
2858  Chain = TCChain;
2859  return true;
2860 }
2861 
2862 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2863  if (!Subtarget->supportsTailCall())
2864  return false;
2865 
2866  auto Attr =
2867  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2868  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2869  return false;
2870 
2871  return true;
2872 }
2873 
2874 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2875 // and pass the lower and high parts through.
2877  SDLoc DL(Op);
2878  SDValue WriteValue = Op->getOperand(2);
2879 
2880  // This function is only supposed to be called for i64 type argument.
2881  assert(WriteValue.getValueType() == MVT::i64
2882  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2883 
2884  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2885  DAG.getConstant(0, DL, MVT::i32));
2886  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2887  DAG.getConstant(1, DL, MVT::i32));
2888  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2889  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2890 }
2891 
2892 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2893 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2894 // one of the above mentioned nodes. It has to be wrapped because otherwise
2895 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2896 // be used to form addressing mode. These wrapped nodes will be selected
2897 // into MOVi.
2898 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2899  SelectionDAG &DAG) const {
2900  EVT PtrVT = Op.getValueType();
2901  // FIXME there is no actual debug info here
2902  SDLoc dl(Op);
2903  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2904  SDValue Res;
2905 
2906  // When generating execute-only code Constant Pools must be promoted to the
2907  // global data section. It's a bit ugly that we can't share them across basic
2908  // blocks, but this way we guarantee that execute-only behaves correct with
2909  // position-independent addressing modes.
2910  if (Subtarget->genExecuteOnly()) {
2911  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2912  auto T = const_cast<Type*>(CP->getType());
2913  auto C = const_cast<Constant*>(CP->getConstVal());
2914  auto M = const_cast<Module*>(DAG.getMachineFunction().
2915  getFunction().getParent());
2916  auto GV = new GlobalVariable(
2917  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
2918  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2919  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2920  Twine(AFI->createPICLabelUId())
2921  );
2922  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2923  dl, PtrVT);
2924  return LowerGlobalAddress(GA, DAG);
2925  }
2926 
2927  if (CP->isMachineConstantPoolEntry())
2928  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2929  CP->getAlignment());
2930  else
2931  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2932  CP->getAlignment());
2933  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2934 }
2935 
2938 }
2939 
2940 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2941  SelectionDAG &DAG) const {
2942  MachineFunction &MF = DAG.getMachineFunction();
2944  unsigned ARMPCLabelIndex = 0;
2945  SDLoc DL(Op);
2946  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2947  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2948  SDValue CPAddr;
2949  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2950  if (!IsPositionIndependent) {
2951  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2952  } else {
2953  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2954  ARMPCLabelIndex = AFI->createPICLabelUId();
2955  ARMConstantPoolValue *CPV =
2956  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2957  ARMCP::CPBlockAddress, PCAdj);
2958  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2959  }
2960  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2961  SDValue Result = DAG.getLoad(
2962  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2964  if (!IsPositionIndependent)
2965  return Result;
2966  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2967  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2968 }
2969 
2970 /// Convert a TLS address reference into the correct sequence of loads
2971 /// and calls to compute the variable's address for Darwin, and return an
2972 /// SDValue containing the final node.
2973 
2974 /// Darwin only has one TLS scheme which must be capable of dealing with the
2975 /// fully general situation, in the worst case. This means:
2976 /// + "extern __thread" declaration.
2977 /// + Defined in a possibly unknown dynamic library.
2978 ///
2979 /// The general system is that each __thread variable has a [3 x i32] descriptor
2980 /// which contains information used by the runtime to calculate the address. The
2981 /// only part of this the compiler needs to know about is the first word, which
2982 /// contains a function pointer that must be called with the address of the
2983 /// entire descriptor in "r0".
2984 ///
2985 /// Since this descriptor may be in a different unit, in general access must
2986 /// proceed along the usual ARM rules. A common sequence to produce is:
2987 ///
2988 /// movw rT1, :lower16:_var$non_lazy_ptr
2989 /// movt rT1, :upper16:_var$non_lazy_ptr
2990 /// ldr r0, [rT1]
2991 /// ldr rT2, [r0]
2992 /// blx rT2
2993 /// [...address now in r0...]
2994 SDValue
2995 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2996  SelectionDAG &DAG) const {
2997  assert(Subtarget->isTargetDarwin() &&
2998  "This function expects a Darwin target");
2999  SDLoc DL(Op);
3000 
3001  // First step is to get the address of the actua global symbol. This is where
3002  // the TLS descriptor lives.
3003  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3004 
3005  // The first entry in the descriptor is a function pointer that we must call
3006  // to obtain the address of the variable.
3007  SDValue Chain = DAG.getEntryNode();
3008  SDValue FuncTLVGet = DAG.getLoad(
3009  MVT::i32, DL, Chain, DescAddr,
3011  /* Alignment = */ 4,
3014  Chain = FuncTLVGet.getValue(1);
3015 
3017  MachineFrameInfo &MFI = F.getFrameInfo();
3018  MFI.setAdjustsStack(true);
3019 
3020  // TLS calls preserve all registers except those that absolutely must be
3021  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3022  // silly).
3023  auto TRI =
3024  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3025  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3026  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3027 
3028  // Finally, we can make the call. This is just a degenerate version of a
3029  // normal AArch64 call node: r0 takes the address of the descriptor, and
3030  // returns the address of the variable in this thread.
3031  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3032  Chain =
3034  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3035  DAG.getRegisterMask(Mask), Chain.getValue(1));
3036  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3037 }
3038 
3039 SDValue
3040 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3041  SelectionDAG &DAG) const {
3042  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3043 
3044  SDValue Chain = DAG.getEntryNode();
3045  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3046  SDLoc DL(Op);
3047 
3048  // Load the current TEB (thread environment block)
3049  SDValue Ops[] = {Chain,
3050  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3051  DAG.getConstant(15, DL, MVT::i32),
3052  DAG.getConstant(0, DL, MVT::i32),
3053  DAG.getConstant(13, DL, MVT::i32),
3054  DAG.getConstant(0, DL, MVT::i32),
3055  DAG.getConstant(2, DL, MVT::i32)};
3056  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3057  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3058 
3059  SDValue TEB = CurrentTEB.getValue(0);
3060  Chain = CurrentTEB.getValue(1);
3061 
3062  // Load the ThreadLocalStoragePointer from the TEB
3063  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3064  SDValue TLSArray =
3065  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3066  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3067 
3068  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3069  // offset into the TLSArray.
3070 
3071  // Load the TLS index from the C runtime
3072  SDValue TLSIndex =
3073  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3074  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3075  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3076 
3077  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3078  DAG.getConstant(2, DL, MVT::i32));
3079  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3080  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3081  MachinePointerInfo());
3082 
3083  // Get the offset of the start of the .tls section (section base)
3084  const auto *GA = cast<GlobalAddressSDNode>(Op);
3085  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3086  SDValue Offset = DAG.getLoad(
3087  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3088  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3090 
3091  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3092 }
3093 
3094 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3095 SDValue
3096 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3097  SelectionDAG &DAG) const {
3098  SDLoc dl(GA);
3099  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3100  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3101  MachineFunction &MF = DAG.getMachineFunction();
3103  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3104  ARMConstantPoolValue *CPV =
3105  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3106  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3107  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3108  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3109  Argument = DAG.getLoad(
3110  PtrVT, dl, DAG.getEntryNode(), Argument,
3112  SDValue Chain = Argument.getValue(1);
3113 
3114  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3115  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3116 
3117  // call __tls_get_addr.
3118  ArgListTy Args;
3120  Entry.Node = Argument;
3121  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3122  Args.push_back(Entry);
3123 
3124  // FIXME: is there useful debug info available here?
3126  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3128  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3129 
3130  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3131  return CallResult.first;
3132 }
3133 
3134 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3135 // "local exec" model.
3136 SDValue
3137 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3138  SelectionDAG &DAG,
3139  TLSModel::Model model) const {
3140  const GlobalValue *GV = GA->getGlobal();
3141  SDLoc dl(GA);
3142  SDValue Offset;
3143  SDValue Chain = DAG.getEntryNode();
3144  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3145  // Get the Thread Pointer
3147 
3148  if (model == TLSModel::InitialExec) {
3149  MachineFunction &MF = DAG.getMachineFunction();
3151  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3152  // Initial exec model.
3153  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3154  ARMConstantPoolValue *CPV =
3155  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3157  true);
3158  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3159  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3160  Offset = DAG.getLoad(
3161  PtrVT, dl, Chain, Offset,
3163  Chain = Offset.getValue(1);
3164 
3165  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3166  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3167 
3168  Offset = DAG.getLoad(
3169  PtrVT, dl, Chain, Offset,
3171  } else {
3172  // local exec model
3173  assert(model == TLSModel::LocalExec);
3174  ARMConstantPoolValue *CPV =
3176  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3177  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3178  Offset = DAG.getLoad(
3179  PtrVT, dl, Chain, Offset,
3181  }
3182 
3183  // The address of the thread local variable is the add of the thread
3184  // pointer with the offset of the variable.
3185  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3186 }
3187 
3188 SDValue
3189 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3190  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3191  if (DAG.getTarget().useEmulatedTLS())
3192  return LowerToTLSEmulatedModel(GA, DAG);
3193 
3194  if (Subtarget->isTargetDarwin())
3195  return LowerGlobalTLSAddressDarwin(Op, DAG);
3196 
3197  if (Subtarget->isTargetWindows())
3198  return LowerGlobalTLSAddressWindows(Op, DAG);
3199 
3200  // TODO: implement the "local dynamic" model
3201  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3203 
3204  switch (model) {
3207  return LowerToTLSGeneralDynamicModel(GA, DAG);
3208  case TLSModel::InitialExec:
3209  case TLSModel::LocalExec:
3210  return LowerToTLSExecModels(GA, DAG, model);
3211  }
3212  llvm_unreachable("bogus TLS model");
3213 }
3214 
3215 /// Return true if all users of V are within function F, looking through
3216 /// ConstantExprs.
3217 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3218  SmallVector<const User*,4> Worklist;
3219  for (auto *U : V->users())
3220  Worklist.push_back(U);
3221  while (!Worklist.empty()) {
3222  auto *U = Worklist.pop_back_val();
3223  if (isa<ConstantExpr>(U)) {
3224  for (auto *UU : U->users())
3225  Worklist.push_back(UU);
3226  continue;
3227  }
3228 
3229  auto *I = dyn_cast<Instruction>(U);
3230  if (!I || I->getParent()->getParent() != F)
3231  return false;
3232  }
3233  return true;
3234 }
3235 
3237  const GlobalValue *GV, SelectionDAG &DAG,
3238  EVT PtrVT, const SDLoc &dl) {
3239  // If we're creating a pool entry for a constant global with unnamed address,
3240  // and the global is small enough, we can emit it inline into the constant pool
3241  // to save ourselves an indirection.
3242  //
3243  // This is a win if the constant is only used in one function (so it doesn't
3244  // need to be duplicated) or duplicating the constant wouldn't increase code
3245  // size (implying the constant is no larger than 4 bytes).
3246  const Function &F = DAG.getMachineFunction().getFunction();
3247 
3248  // We rely on this decision to inline being idemopotent and unrelated to the
3249  // use-site. We know that if we inline a variable at one use site, we'll
3250  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3251  // doesn't know about this optimization, so bail out if it's enabled else
3252  // we could decide to inline here (and thus never emit the GV) but require
3253  // the GV from fast-isel generated code.
3254  if (!EnableConstpoolPromotion ||
3256  return SDValue();
3257 
3258  auto *GVar = dyn_cast<GlobalVariable>(GV);
3259  if (!GVar || !GVar->hasInitializer() ||
3260  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3261  !GVar->hasLocalLinkage())
3262  return SDValue();
3263 
3264  // If we inline a value that contains relocations, we move the relocations
3265  // from .data to .text. This is not allowed in position-independent code.
3266  auto *Init = GVar->getInitializer();
3267  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3268  Init->needsRelocation())
3269  return SDValue();
3270 
3271  // The constant islands pass can only really deal with alignment requests
3272  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3273  // any type wanting greater alignment requirements than 4 bytes. We also
3274  // can only promote constants that are multiples of 4 bytes in size or
3275  // are paddable to a multiple of 4. Currently we only try and pad constants
3276  // that are strings for simplicity.
3277  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3278  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3279  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3280  unsigned RequiredPadding = 4 - (Size % 4);
3281  bool PaddingPossible =
3282  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3283  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3284  Size == 0)
3285  return SDValue();
3286 
3287  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3288  MachineFunction &MF = DAG.getMachineFunction();
3290 
3291  // We can't bloat the constant pool too much, else the ConstantIslands pass
3292  // may fail to converge. If we haven't promoted this global yet (it may have
3293  // multiple uses), and promoting it would increase the constant pool size (Sz
3294  // > 4), ensure we have space to do so up to MaxTotal.
3295  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3296  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3298  return SDValue();
3299 
3300  // This is only valid if all users are in a single function; we can't clone
3301  // the constant in general. The LLVM IR unnamed_addr allows merging
3302  // constants, but not cloning them.
3303  //
3304  // We could potentially allow cloning if we could prove all uses of the
3305  // constant in the current function don't care about the address, like
3306  // printf format strings. But that isn't implemented for now.
3307  if (!allUsersAreInFunction(GVar, &F))
3308  return SDValue();
3309 
3310  // We're going to inline this global. Pad it out if needed.
3311  if (RequiredPadding != 4) {
3312  StringRef S = CDAInit->getAsString();
3313 
3315  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3316  while (RequiredPadding--)
3317  V.push_back(0);
3318  Init = ConstantDataArray::get(*DAG.getContext(), V);
3319  }
3320 
3321  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3322  SDValue CPAddr =
3323  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3324  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3327  PaddedSize - 4);
3328  }
3329  ++NumConstpoolPromoted;
3330  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3331 }
3332 
3334  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3335  if (!(GV = GA->getBaseObject()))
3336  return false;
3337  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3338  return V->isConstant();
3339  return isa<Function>(GV);
3340 }
3341 
3342 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3343  SelectionDAG &DAG) const {
3344  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3345  default: llvm_unreachable("unknown object format");
3346  case Triple::COFF:
3347  return LowerGlobalAddressWindows(Op, DAG);
3348  case Triple::ELF:
3349  return LowerGlobalAddressELF(Op, DAG);
3350  case Triple::MachO:
3351  return LowerGlobalAddressDarwin(Op, DAG);
3352  }
3353 }
3354 
3355 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3356  SelectionDAG &DAG) const {
3357  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3358  SDLoc dl(Op);
3359  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3360  const TargetMachine &TM = getTargetMachine();
3361  bool IsRO = isReadOnly(GV);
3362 
3363  // promoteToConstantPool only if not generating XO text section
3364  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3365  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3366  return V;
3367 
3368  if (isPositionIndependent()) {
3369  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3370  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3371  UseGOT_PREL ? ARMII::MO_GOT : 0);
3372  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3373  if (UseGOT_PREL)
3374  Result =
3375  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3377  return Result;
3378  } else if (Subtarget->isROPI() && IsRO) {
3379  // PC-relative.
3380  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3381  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3382  return Result;
3383  } else if (Subtarget->isRWPI() && !IsRO) {
3384  // SB-relative.
3385  SDValue RelAddr;
3386  if (Subtarget->useMovt()) {
3387  ++NumMovwMovt;
3388  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3389  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3390  } else { // use literal pool for address constant
3391  ARMConstantPoolValue *CPV =
3393  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3394  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3395  RelAddr = DAG.getLoad(
3396  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3398  }
3399  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3400  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3401  return Result;
3402  }
3403 
3404  // If we have T2 ops, we can materialize the address directly via movt/movw
3405  // pair. This is always cheaper.
3406  if (Subtarget->useMovt()) {
3407  ++NumMovwMovt;
3408  // FIXME: Once remat is capable of dealing with instructions with register
3409  // operands, expand this into two nodes.
3410  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3411  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3412  } else {
3413  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3414  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3415  return DAG.getLoad(
3416  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3418  }
3419 }
3420 
3421 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3422  SelectionDAG &DAG) const {
3423  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3424  "ROPI/RWPI not currently supported for Darwin");
3425  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3426  SDLoc dl(Op);
3427  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3428 
3429  if (Subtarget->useMovt())
3430  ++NumMovwMovt;
3431 
3432  // FIXME: Once remat is capable of dealing with instructions with register
3433  // operands, expand this into multiple nodes
3434  unsigned Wrapper =
3436 
3437  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3438  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3439 
3440  if (Subtarget->isGVIndirectSymbol(GV))
3441  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3443  return Result;
3444 }
3445 
3446 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3447  SelectionDAG &DAG) const {
3448  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3449  assert(Subtarget->useMovt() &&
3450  "Windows on ARM expects to use movw/movt");
3451  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3452  "ROPI/RWPI not currently supported for Windows");
3453 
3454  const TargetMachine &TM = getTargetMachine();
3455  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3456  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3457  if (GV->hasDLLImportStorageClass())
3458  TargetFlags = ARMII::MO_DLLIMPORT;
3459  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3460  TargetFlags = ARMII::MO_COFFSTUB;
3461  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3462  SDValue Result;
3463  SDLoc DL(Op);
3464 
3465  ++NumMovwMovt;
3466 
3467  // FIXME: Once remat is capable of dealing with instructions with register
3468  // operands, expand this into two nodes.
3469  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3470  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3471  TargetFlags));
3472  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3473  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3475  return Result;
3476 }
3477 
3478 SDValue
3479 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3480  SDLoc dl(Op);
3481  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3482  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3483  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3484  Op.getOperand(1), Val);
3485 }
3486 
3487 SDValue
3488 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3489  SDLoc dl(Op);
3490  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3491  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3492 }
3493 
3494 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3495  SelectionDAG &DAG) const {
3496  SDLoc dl(Op);
3498  Op.getOperand(0));
3499 }
3500 
3501 SDValue
3502 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3503  const ARMSubtarget *Subtarget) const {
3504  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3505  SDLoc dl(Op);
3506  switch (IntNo) {
3507  default: return SDValue(); // Don't custom lower most intrinsics.
3508  case Intrinsic::thread_pointer: {
3509  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3510  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3511  }
3512  case Intrinsic::eh_sjlj_lsda: {
3513  MachineFunction &MF = DAG.getMachineFunction();
3515  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3516  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3517  SDValue CPAddr;
3518  bool IsPositionIndependent = isPositionIndependent();
3519  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3520  ARMConstantPoolValue *CPV =
3521  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3522  ARMCP::CPLSDA, PCAdj);
3523  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3524  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3525  SDValue Result = DAG.getLoad(
3526  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3528 
3529  if (IsPositionIndependent) {
3530  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3531  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3532  }
3533  return Result;
3534  }
3535  case Intrinsic::arm_neon_vabs:
3536  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3537  Op.getOperand(1));
3538  case Intrinsic::arm_neon_vmulls:
3539  case Intrinsic::arm_neon_vmullu: {
3540  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3542  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3543  Op.getOperand(1), Op.getOperand(2));
3544  }
3545  case Intrinsic::arm_neon_vminnm:
3546  case Intrinsic::arm_neon_vmaxnm: {
3547  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3549  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3550  Op.getOperand(1), Op.getOperand(2));
3551  }
3552  case Intrinsic::arm_neon_vminu:
3553  case Intrinsic::arm_neon_vmaxu: {
3554  if (Op.getValueType().isFloatingPoint())
3555  return SDValue();
3556  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3557  ? ISD::UMIN : ISD::UMAX;
3558  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3559  Op.getOperand(1), Op.getOperand(2));
3560  }
3561  case Intrinsic::arm_neon_vmins:
3562  case Intrinsic::arm_neon_vmaxs: {
3563  // v{min,max}s is overloaded between signed integers and floats.
3564  if (!Op.getValueType().isFloatingPoint()) {
3565  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3566  ? ISD::SMIN : ISD::SMAX;
3567  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3568  Op.getOperand(1), Op.getOperand(2));
3569  }
3570  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3572  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3573  Op.getOperand(1), Op.getOperand(2));
3574  }
3575  case Intrinsic::arm_neon_vtbl1:
3576  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3577  Op.getOperand(1), Op.getOperand(2));
3578  case Intrinsic::arm_neon_vtbl2:
3579  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3580  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3581  }
3582 }
3583 
3585  const ARMSubtarget *Subtarget) {
3586  SDLoc dl(Op);
3587  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3588  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3589  if (SSID == SyncScope::SingleThread)
3590  return Op;
3591 
3592  if (!Subtarget->hasDataBarrier()) {
3593  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3594  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3595  // here.
3596  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3597  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3598  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3599  DAG.getConstant(0, dl, MVT::i32));
3600  }
3601 
3602  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3603  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3604  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3605  if (Subtarget->isMClass()) {
3606  // Only a full system barrier exists in the M-class architectures.
3607  Domain = ARM_MB::SY;
3608  } else if (Subtarget->preferISHSTBarriers() &&
3609  Ord == AtomicOrdering::Release) {
3610  // Swift happens to implement ISHST barriers in a way that's compatible with
3611  // Release semantics but weaker than ISH so we'd be fools not to use
3612  // it. Beware: other processors probably don't!
3613  Domain = ARM_MB::ISHST;
3614  }
3615 
3616  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3617  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3618  DAG.getConstant(Domain, dl, MVT::i32));
3619 }
3620 
3622  const ARMSubtarget *Subtarget) {
3623  // ARM pre v5TE and Thumb1 does not have preload instructions.
3624  if (!(Subtarget->isThumb2() ||
3625  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3626  // Just preserve the chain.
3627  return Op.getOperand(0);
3628 
3629  SDLoc dl(Op);
3630  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3631  if (!isRead &&
3632  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3633  // ARMv7 with MP extension has PLDW.
3634  return Op.getOperand(0);
3635 
3636  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3637  if (Subtarget->isThumb()) {
3638  // Invert the bits.
3639  isRead = ~isRead & 1;
3640  isData = ~isData & 1;
3641  }
3642 
3643  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3644  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3645  DAG.getConstant(isData, dl, MVT::i32));
3646 }
3647 
3649  MachineFunction &MF = DAG.getMachineFunction();
3650  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3651 
3652  // vastart just stores the address of the VarArgsFrameIndex slot into the
3653  // memory location argument.
3654  SDLoc dl(Op);
3655  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3656  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3657  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3658  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3659  MachinePointerInfo(SV));
3660 }
3661 
3662 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3663  CCValAssign &NextVA,
3664  SDValue &Root,
3665  SelectionDAG &DAG,
3666  const SDLoc &dl) const {
3667  MachineFunction &MF = DAG.getMachineFunction();
3669 
3670  const TargetRegisterClass *RC;
3671  if (AFI->isThumb1OnlyFunction())
3672  RC = &ARM::tGPRRegClass;
3673  else
3674  RC = &ARM::GPRRegClass;
3675 
3676  // Transform the arguments stored in physical registers into virtual ones.
3677  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3678  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3679 
3680  SDValue ArgValue2;
3681  if (NextVA.isMemLoc()) {
3682  MachineFrameInfo &MFI = MF.getFrameInfo();
3683  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3684 
3685  // Create load node to retrieve arguments from the stack.
3686  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3687  ArgValue2 = DAG.getLoad(
3688  MVT::i32, dl, Root, FIN,
3690  } else {
3691  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3692  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3693  }
3694  if (!Subtarget->isLittle())
3695  std::swap (ArgValue, ArgValue2);
3696  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3697 }
3698 
3699 // The remaining GPRs hold either the beginning of variable-argument
3700 // data, or the beginning of an aggregate passed by value (usually
3701 // byval). Either way, we allocate stack slots adjacent to the data
3702 // provided by our caller, and store the unallocated registers there.
3703 // If this is a variadic function, the va_list pointer will begin with
3704 // these values; otherwise, this reassembles a (byval) structure that
3705 // was split between registers and memory.
3706 // Return: The frame index registers were stored into.
3707 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3708  const SDLoc &dl, SDValue &Chain,
3709  const Value *OrigArg,
3710  unsigned InRegsParamRecordIdx,
3711  int ArgOffset, unsigned ArgSize) const {
3712  // Currently, two use-cases possible:
3713  // Case #1. Non-var-args function, and we meet first byval parameter.
3714  // Setup first unallocated register as first byval register;
3715  // eat all remained registers
3716  // (these two actions are performed by HandleByVal method).
3717  // Then, here, we initialize stack frame with
3718  // "store-reg" instructions.
3719  // Case #2. Var-args function, that doesn't contain byval parameters.
3720  // The same: eat all remained unallocated registers,
3721  // initialize stack frame.
3722 
3723  MachineFunction &MF = DAG.getMachineFunction();
3724  MachineFrameInfo &MFI = MF.getFrameInfo();
3726  unsigned RBegin, REnd;
3727  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3728  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3729  } else {
3730  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3731  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3732  REnd = ARM::R4;
3733  }
3734 
3735  if (REnd != RBegin)
3736  ArgOffset = -4 * (ARM::R4 - RBegin);
3737 
3738  auto PtrVT = getPointerTy(DAG.getDataLayout());
3739  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3740  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3741 
3742  SmallVector<SDValue, 4> MemOps;
3743  const TargetRegisterClass *RC =
3744  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3745 
3746  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3747  unsigned VReg = MF.addLiveIn(Reg, RC);
3748  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3749  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3750  MachinePointerInfo(OrigArg, 4 * i));
3751  MemOps.push_back(Store);
3752  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3753  }
3754 
3755  if (!MemOps.empty())
3756  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3757  return FrameIndex;
3758 }
3759 
3760 // Setup stack frame, the va_list pointer will start from.
3761 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3762  const SDLoc &dl, SDValue &Chain,
3763  unsigned ArgOffset,
3764  unsigned TotalArgRegsSaveSize,
3765  bool ForceMutable) const {
3766  MachineFunction &MF = DAG.getMachineFunction();
3768 
3769  // Try to store any remaining integer argument regs
3770  // to their spots on the stack so that they may be loaded by dereferencing
3771  // the result of va_next.
3772  // If there is no regs to be stored, just point address after last
3773  // argument passed via stack.
3774  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3775  CCInfo.getInRegsParamsCount(),
3776  CCInfo.getNextStackOffset(),
3777  std::max(4U, TotalArgRegsSaveSize));
3778  AFI->setVarArgsFrameIndex(FrameIndex);
3779 }
3780 
3781 SDValue ARMTargetLowering::LowerFormalArguments(
3782  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3783  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3784  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3785  MachineFunction &MF = DAG.getMachineFunction();
3786  MachineFrameInfo &MFI = MF.getFrameInfo();
3787 
3789 
3790  // Assign locations to all of the incoming arguments.
3792  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3793  *DAG.getContext());
3794  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3795 
3796  SmallVector<SDValue, 16> ArgValues;
3797  SDValue ArgValue;
3799  unsigned CurArgIdx = 0;
3800 
3801  // Initially ArgRegsSaveSize is zero.
3802  // Then we increase this value each time we meet byval parameter.
3803  // We also increase this value in case of varargs function.
3804  AFI->setArgRegsSaveSize(0);
3805 
3806  // Calculate the amount of stack space that we need to allocate to store
3807  // byval and variadic arguments that are passed in registers.
3808  // We need to know this before we allocate the first byval or variadic
3809  // argument, as they will be allocated a stack slot below the CFA (Canonical
3810  // Frame Address, the stack pointer at entry to the function).
3811  unsigned ArgRegBegin = ARM::R4;
3812  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3813  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3814  break;
3815 
3816  CCValAssign &VA = ArgLocs[i];
3817  unsigned Index = VA.getValNo();
3818  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3819  if (!Flags.isByVal())
3820  continue;
3821 
3822  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3823  unsigned RBegin, REnd;
3824  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3825  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3826 
3827  CCInfo.nextInRegsParam();
3828  }
3829  CCInfo.rewindByValRegsInfo();
3830 
3831  int lastInsIndex = -1;
3832  if (isVarArg && MFI.hasVAStart()) {
3833  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3834  if (RegIdx != array_lengthof(GPRArgRegs))
3835  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3836  }
3837 
3838  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3839  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3840  auto PtrVT = getPointerTy(DAG.getDataLayout());
3841 
3842  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3843  CCValAssign &VA = ArgLocs[i];
3844  if (Ins[VA.getValNo()].isOrigArg()) {
3845  std::advance(CurOrigArg,
3846  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3847  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3848  }
3849  // Arguments stored in registers.
3850  if (VA.isRegLoc()) {
3851  EVT RegVT = VA.getLocVT();
3852 
3853  if (VA.needsCustom()) {
3854  // f64 and vector types are split up into multiple registers or
3855  // combinations of registers and stack slots.
3856  if (VA.getLocVT() == MVT::v2f64) {
3857  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3858  Chain, DAG, dl);
3859  VA = ArgLocs[++i]; // skip ahead to next loc
3860  SDValue ArgValue2;
3861  if (VA.isMemLoc()) {
3862  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3863  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3864  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3866  DAG.getMachineFunction(), FI));
3867  } else {
3868  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3869  Chain, DAG, dl);
3870  }
3871  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3872  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3873  ArgValue, ArgValue1,
3874  DAG.getIntPtrConstant(0, dl));
3875  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3876  ArgValue, ArgValue2,
3877  DAG.getIntPtrConstant(1, dl));
3878  } else
3879  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3880  } else {
3881  const TargetRegisterClass *RC;
3882 
3883 
3884  if (RegVT == MVT::f16)
3885  RC = &ARM::HPRRegClass;
3886  else if (RegVT == MVT::f32)
3887  RC = &ARM::SPRRegClass;
3888  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3889  RC = &ARM::DPRRegClass;
3890  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3891  RC = &ARM::QPRRegClass;
3892  else if (RegVT == MVT::i32)
3893  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3894  : &ARM::GPRRegClass;
3895  else
3896  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3897 
3898  // Transform the arguments in physical registers into virtual ones.
3899  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3900  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3901  }
3902 
3903  // If this is an 8 or 16-bit value, it is really passed promoted
3904  // to 32 bits. Insert an assert[sz]ext to capture this, then
3905  // truncate to the right size.
3906  switch (VA.getLocInfo()) {
3907  default: llvm_unreachable("Unknown loc info!");
3908  case CCValAssign::Full: break;
3909  case CCValAssign::BCvt:
3910  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3911  break;
3912  case CCValAssign::SExt:
3913  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3914  DAG.getValueType(VA.getValVT()));
3915  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3916  break;
3917  case CCValAssign::ZExt:
3918  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3919  DAG.getValueType(VA.getValVT()));
3920  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3921  break;
3922  }
3923 
3924  InVals.push_back(ArgValue);
3925  } else { // VA.isRegLoc()
3926  // sanity check
3927  assert(VA.isMemLoc());
3928  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3929 
3930  int index = VA.getValNo();
3931 
3932  // Some Ins[] entries become multiple ArgLoc[] entries.
3933  // Process them only once.
3934  if (index != lastInsIndex)
3935  {
3936  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3937  // FIXME: For now, all byval parameter objects are marked mutable.
3938  // This can be changed with more analysis.
3939  // In case of tail call optimization mark all arguments mutable.
3940  // Since they could be overwritten by lowering of arguments in case of
3941  // a tail call.
3942  if (Flags.isByVal()) {
3943  assert(Ins[index].isOrigArg() &&
3944  "Byval arguments cannot be implicit");
3945  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3946 
3947  int FrameIndex = StoreByValRegs(
3948  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3949  VA.getLocMemOffset(), Flags.getByValSize());
3950  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3951  CCInfo.nextInRegsParam();
3952  } else {
3953  unsigned FIOffset = VA.getLocMemOffset();
3954  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3955  FIOffset, true);
3956 
3957  // Create load nodes to retrieve arguments from the stack.
3958  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3959  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3961  DAG.getMachineFunction(), FI)));
3962  }
3963  lastInsIndex = index;
3964  }
3965  }
3966  }
3967 
3968  // varargs
3969  if (isVarArg && MFI.hasVAStart())
3970  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3971  CCInfo.getNextStackOffset(),
3972  TotalArgRegsSaveSize);
3973 
3975 
3976  return Chain;
3977 }
3978 
3979 /// isFloatingPointZero - Return true if this is +0.0.
3980 static bool isFloatingPointZero(SDValue Op) {
3981  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3982  return CFP->getValueAPF().isPosZero();
3983  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3984  // Maybe this has already been legalized into the constant pool?
3985  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3986  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3987  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3988  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3989  return CFP->getValueAPF().isPosZero();
3990  }
3991  } else if (Op->getOpcode() == ISD::BITCAST &&
3992  Op->getValueType(0) == MVT::f64) {
3993  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3994  // created by LowerConstantFP().
3995  SDValue BitcastOp = Op->getOperand(0);
3996  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3997  isNullConstant(BitcastOp->getOperand(0)))
3998  return true;
3999  }
4000  return false;
4001 }
4002 
4003 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4004 /// the given operands.
4005 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4006  SDValue &ARMcc, SelectionDAG &DAG,
4007  const SDLoc &dl) const {
4008  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4009  unsigned C = RHSC->getZExtValue();
4010  if (!isLegalICmpImmediate((int32_t)C)) {
4011  // Constant does not fit, try adjusting it by one.
4012  switch (CC) {
4013  default: break;
4014  case ISD::SETLT:
4015  case ISD::SETGE:
4016  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4017  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4018  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4019  }
4020  break;
4021  case ISD::SETULT:
4022  case ISD::SETUGE:
4023  if (C != 0 && isLegalICmpImmediate(C-1)) {
4024  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4025  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4026  }
4027  break;
4028  case ISD::SETLE:
4029  case ISD::SETGT:
4030  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4031  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4032  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4033  }
4034  break;
4035  case ISD::SETULE:
4036  case ISD::SETUGT:
4037  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4038  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4039  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4040  }
4041  break;
4042  }
4043  }
4044  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4046  // In ARM and Thumb-2, the compare instructions can shift their second
4047  // operand.
4049  std::swap(LHS, RHS);
4050  }
4051 
4053 
4054  // If the RHS is a constant zero then the V (overflow) flag will never be
4055  // set. This can allow us to simplify GE to PL or LT to MI, which can be
4056  // simpler for other passes (like the peephole optimiser) to deal with.
4057  if (isNullConstant(RHS)) {
4058  switch (CondCode) {
4059  default: break;
4060