LLVM  10.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 #define DEBUG_TYPE "arm-isel"
119 
120 STATISTIC(NumTailCalls, "Number of tail calls");
121 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123 STATISTIC(NumConstpoolPromoted,
124  "Number of constants with their storage promoted into constant pools");
125 
126 static cl::opt<bool>
127 ARMInterworking("arm-interworking", cl::Hidden,
128  cl::desc("Enable / disable ARM interworking (for debugging only)"),
129  cl::init(true));
130 
132  "arm-promote-constant", cl::Hidden,
133  cl::desc("Enable / disable promotion of unnamed_addr constants into "
134  "constant pools"),
135  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137  "arm-promote-constant-max-size", cl::Hidden,
138  cl::desc("Maximum size of constant to promote into a constant pool"),
139  cl::init(64));
141  "arm-promote-constant-max-total", cl::Hidden,
142  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143  cl::init(128));
144 
145 // The APCS parameter registers.
146 static const MCPhysReg GPRArgRegs[] = {
147  ARM::R0, ARM::R1, ARM::R2, ARM::R3
148 };
149 
150 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151  MVT PromotedBitwiseVT) {
152  if (VT != PromotedLdStVT) {
153  setOperationAction(ISD::LOAD, VT, Promote);
154  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155 
156  setOperationAction(ISD::STORE, VT, Promote);
157  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158  }
159 
160  MVT ElemTy = VT.getVectorElementType();
161  if (ElemTy != MVT::f64)
162  setOperationAction(ISD::SETCC, VT, Custom);
163  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
164  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
165  if (ElemTy == MVT::i32) {
166  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
167  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
168  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
169  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
170  } else {
171  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
172  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
173  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
174  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
175  }
176  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
177  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
178  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
179  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
180  setOperationAction(ISD::SELECT, VT, Expand);
181  setOperationAction(ISD::SELECT_CC, VT, Expand);
182  setOperationAction(ISD::VSELECT, VT, Expand);
183  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
184  if (VT.isInteger()) {
185  setOperationAction(ISD::SHL, VT, Custom);
186  setOperationAction(ISD::SRA, VT, Custom);
187  setOperationAction(ISD::SRL, VT, Custom);
188  }
189 
190  // Promote all bit-wise operations.
191  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  setOperationAction(ISD::AND, VT, Promote);
193  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  setOperationAction(ISD::OR, VT, Promote);
195  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  setOperationAction(ISD::XOR, VT, Promote);
197  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198  }
199 
200  // Neon does not support vector divide/remainder operations.
201  setOperationAction(ISD::SDIV, VT, Expand);
202  setOperationAction(ISD::UDIV, VT, Expand);
203  setOperationAction(ISD::FDIV, VT, Expand);
204  setOperationAction(ISD::SREM, VT, Expand);
205  setOperationAction(ISD::UREM, VT, Expand);
206  setOperationAction(ISD::FREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212 }
213 
214 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215  addRegisterClass(VT, &ARM::DPRRegClass);
216  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217 }
218 
219 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220  addRegisterClass(VT, &ARM::DPairRegClass);
221  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222 }
223 
224 void ARMTargetLowering::setAllExpand(MVT VT) {
225  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
226  setOperationAction(Opc, VT, Expand);
227 
228  // We support these really simple operations even on types where all
229  // the actual arithmetic has to be broken down into simpler
230  // operations or turned into library calls.
231  setOperationAction(ISD::BITCAST, VT, Legal);
232  setOperationAction(ISD::LOAD, VT, Legal);
233  setOperationAction(ISD::STORE, VT, Legal);
234  setOperationAction(ISD::UNDEF, VT, Legal);
235 }
236 
237 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
238  LegalizeAction Action) {
239  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
240  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
241  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
242 }
243 
244 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
245  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
246 
247  for (auto VT : IntTypes) {
248  addRegisterClass(VT, &ARM::QPRRegClass);
249  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
250  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
251  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
252  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
253  setOperationAction(ISD::SHL, VT, Custom);
254  setOperationAction(ISD::SRA, VT, Custom);
255  setOperationAction(ISD::SRL, VT, Custom);
256  setOperationAction(ISD::SMIN, VT, Legal);
257  setOperationAction(ISD::SMAX, VT, Legal);
258  setOperationAction(ISD::UMIN, VT, Legal);
259  setOperationAction(ISD::UMAX, VT, Legal);
260  setOperationAction(ISD::ABS, VT, Legal);
261  setOperationAction(ISD::SETCC, VT, Custom);
262 
263  // No native support for these.
264  setOperationAction(ISD::UDIV, VT, Expand);
265  setOperationAction(ISD::SDIV, VT, Expand);
266  setOperationAction(ISD::UREM, VT, Expand);
267  setOperationAction(ISD::SREM, VT, Expand);
268  setOperationAction(ISD::CTPOP, VT, Expand);
269 
270  if (!HasMVEFP) {
271  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
272  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
273  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
274  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
275  }
276 
277  // Pre and Post inc are supported on loads and stores
278  for (unsigned im = (unsigned)ISD::PRE_INC;
280  setIndexedLoadAction(im, VT, Legal);
281  setIndexedStoreAction(im, VT, Legal);
282  }
283  }
284 
285  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
286  for (auto VT : FloatTypes) {
287  addRegisterClass(VT, &ARM::QPRRegClass);
288  if (!HasMVEFP)
289  setAllExpand(VT);
290 
291  // These are legal or custom whether we have MVE.fp or not
292  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
293  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
294  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
295  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
296  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
297  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
298  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
299  setOperationAction(ISD::SETCC, VT, Custom);
300 
301  // Pre and Post inc are supported on loads and stores
302  for (unsigned im = (unsigned)ISD::PRE_INC;
304  setIndexedLoadAction(im, VT, Legal);
305  setIndexedStoreAction(im, VT, Legal);
306  }
307 
308  if (HasMVEFP) {
309  setOperationAction(ISD::FMINNUM, VT, Legal);
310  setOperationAction(ISD::FMAXNUM, VT, Legal);
311  setOperationAction(ISD::FROUND, VT, Legal);
312 
313  // No native support for these.
314  setOperationAction(ISD::FDIV, VT, Expand);
315  setOperationAction(ISD::FREM, VT, Expand);
316  setOperationAction(ISD::FSQRT, VT, Expand);
317  setOperationAction(ISD::FSIN, VT, Expand);
318  setOperationAction(ISD::FCOS, VT, Expand);
319  setOperationAction(ISD::FPOW, VT, Expand);
320  setOperationAction(ISD::FLOG, VT, Expand);
321  setOperationAction(ISD::FLOG2, VT, Expand);
322  setOperationAction(ISD::FLOG10, VT, Expand);
323  setOperationAction(ISD::FEXP, VT, Expand);
324  setOperationAction(ISD::FEXP2, VT, Expand);
325  setOperationAction(ISD::FNEARBYINT, VT, Expand);
326  }
327  }
328 
329  // We 'support' these types up to bitcast/load/store level, regardless of
330  // MVE integer-only / float support. Only doing FP data processing on the FP
331  // vector types is inhibited at integer-only level.
332  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
333  for (auto VT : LongTypes) {
334  addRegisterClass(VT, &ARM::QPRRegClass);
335  setAllExpand(VT);
336  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
337  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
338  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
339  }
340  // We can do bitwise operations on v2i64 vectors
341  setOperationAction(ISD::AND, MVT::v2i64, Legal);
342  setOperationAction(ISD::OR, MVT::v2i64, Legal);
343  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
344 
345  // It is legal to extload from v4i8 to v4i16 or v4i32.
346  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
347  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
348  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
349 
350  // Some truncating stores are legal too.
351  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
352  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
353  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
354 
355  // Pre and Post inc on these are legal, given the correct extends
356  for (unsigned im = (unsigned)ISD::PRE_INC;
358  setIndexedLoadAction(im, MVT::v8i8, Legal);
359  setIndexedStoreAction(im, MVT::v8i8, Legal);
360  setIndexedLoadAction(im, MVT::v4i8, Legal);
361  setIndexedStoreAction(im, MVT::v4i8, Legal);
362  setIndexedLoadAction(im, MVT::v4i16, Legal);
363  setIndexedStoreAction(im, MVT::v4i16, Legal);
364  }
365 
366  // Predicate types
367  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
368  for (auto VT : pTypes) {
369  addRegisterClass(VT, &ARM::VCCRRegClass);
370  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
371  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
372  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
373  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
374  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
375  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
376  setOperationAction(ISD::SETCC, VT, Custom);
377  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
378  }
379 }
380 
382  const ARMSubtarget &STI)
383  : TargetLowering(TM), Subtarget(&STI) {
384  RegInfo = Subtarget->getRegisterInfo();
385  Itins = Subtarget->getInstrItineraryData();
386 
389 
390  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
391  !Subtarget->isTargetWatchOS()) {
392  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
393  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
394  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
395  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
397  }
398 
399  if (Subtarget->isTargetMachO()) {
400  // Uses VFP for Thumb libfuncs if available.
401  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
402  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
403  static const struct {
404  const RTLIB::Libcall Op;
405  const char * const Name;
406  const ISD::CondCode Cond;
407  } LibraryCalls[] = {
408  // Single-precision floating-point arithmetic.
409  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
410  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
411  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
412  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
413 
414  // Double-precision floating-point arithmetic.
415  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
416  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
417  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
418  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
419 
420  // Single-precision comparisons.
421  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
422  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
423  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
424  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
425  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
426  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
427  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
428  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
429 
430  // Double-precision comparisons.
431  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
432  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
433  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
434  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
435  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
436  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
437  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
438  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
439 
440  // Floating-point to integer conversions.
441  // i64 conversions are done via library routines even when generating VFP
442  // instructions, so use the same ones.
443  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
444  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
445  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
446  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
447 
448  // Conversions between floating types.
449  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
450  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
451 
452  // Integer to floating-point conversions.
453  // i64 conversions are done via library routines even when generating VFP
454  // instructions, so use the same ones.
455  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
456  // e.g., __floatunsidf vs. __floatunssidfvfp.
457  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
458  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
459  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
460  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
461  };
462 
463  for (const auto &LC : LibraryCalls) {
464  setLibcallName(LC.Op, LC.Name);
465  if (LC.Cond != ISD::SETCC_INVALID)
466  setCmpLibcallCC(LC.Op, LC.Cond);
467  }
468  }
469  }
470 
471  // These libcalls are not available in 32-bit.
472  setLibcallName(RTLIB::SHL_I128, nullptr);
473  setLibcallName(RTLIB::SRL_I128, nullptr);
474  setLibcallName(RTLIB::SRA_I128, nullptr);
475 
476  // RTLIB
477  if (Subtarget->isAAPCS_ABI() &&
478  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
479  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
480  static const struct {
481  const RTLIB::Libcall Op;
482  const char * const Name;
483  const CallingConv::ID CC;
484  const ISD::CondCode Cond;
485  } LibraryCalls[] = {
486  // Double-precision floating-point arithmetic helper functions
487  // RTABI chapter 4.1.2, Table 2
488  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
489  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
490  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
491  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
492 
493  // Double-precision floating-point comparison helper functions
494  // RTABI chapter 4.1.2, Table 3
495  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
496  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
497  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
498  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
499  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
500  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
501  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
502  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
503 
504  // Single-precision floating-point arithmetic helper functions
505  // RTABI chapter 4.1.2, Table 4
506  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
507  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
508  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
509  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
510 
511  // Single-precision floating-point comparison helper functions
512  // RTABI chapter 4.1.2, Table 5
513  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
514  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
515  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
516  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
517  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
518  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
519  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
520  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
521 
522  // Floating-point to integer conversions.
523  // RTABI chapter 4.1.2, Table 6
524  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
525  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
526  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
527  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
528  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
529  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
530  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
531  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
532 
533  // Conversions between floating types.
534  // RTABI chapter 4.1.2, Table 7
535  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
536  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
537  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
538 
539  // Integer to floating-point conversions.
540  // RTABI chapter 4.1.2, Table 8
541  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
542  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
543  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
544  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
545  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
546  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
549 
550  // Long long helper functions
551  // RTABI chapter 4.2, Table 9
552  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
553  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
554  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
555  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
556 
557  // Integer division functions
558  // RTABI chapter 4.3.1
559  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
560  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
561  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
562  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
565  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
566  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
567  };
568 
569  for (const auto &LC : LibraryCalls) {
570  setLibcallName(LC.Op, LC.Name);
571  setLibcallCallingConv(LC.Op, LC.CC);
572  if (LC.Cond != ISD::SETCC_INVALID)
573  setCmpLibcallCC(LC.Op, LC.Cond);
574  }
575 
576  // EABI dependent RTLIB
577  if (TM.Options.EABIVersion == EABI::EABI4 ||
579  static const struct {
580  const RTLIB::Libcall Op;
581  const char *const Name;
582  const CallingConv::ID CC;
583  const ISD::CondCode Cond;
584  } MemOpsLibraryCalls[] = {
585  // Memory operations
586  // RTABI chapter 4.3.4
588  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
589  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
590  };
591 
592  for (const auto &LC : MemOpsLibraryCalls) {
593  setLibcallName(LC.Op, LC.Name);
594  setLibcallCallingConv(LC.Op, LC.CC);
595  if (LC.Cond != ISD::SETCC_INVALID)
596  setCmpLibcallCC(LC.Op, LC.Cond);
597  }
598  }
599  }
600 
601  if (Subtarget->isTargetWindows()) {
602  static const struct {
603  const RTLIB::Libcall Op;
604  const char * const Name;
605  const CallingConv::ID CC;
606  } LibraryCalls[] = {
607  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
608  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
609  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
610  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
611  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
612  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
613  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
614  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
615  };
616 
617  for (const auto &LC : LibraryCalls) {
618  setLibcallName(LC.Op, LC.Name);
619  setLibcallCallingConv(LC.Op, LC.CC);
620  }
621  }
622 
623  // Use divmod compiler-rt calls for iOS 5.0 and later.
624  if (Subtarget->isTargetMachO() &&
625  !(Subtarget->isTargetIOS() &&
626  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
627  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
628  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
629  }
630 
631  // The half <-> float conversion functions are always soft-float on
632  // non-watchos platforms, but are needed for some targets which use a
633  // hard-float calling convention by default.
634  if (!Subtarget->isTargetWatchABI()) {
635  if (Subtarget->isAAPCS_ABI()) {
636  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
637  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
638  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
639  } else {
640  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
641  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
642  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
643  }
644  }
645 
646  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
647  // a __gnu_ prefix (which is the default).
648  if (Subtarget->isTargetAEABI()) {
649  static const struct {
650  const RTLIB::Libcall Op;
651  const char * const Name;
652  const CallingConv::ID CC;
653  } LibraryCalls[] = {
654  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
655  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
656  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
657  };
658 
659  for (const auto &LC : LibraryCalls) {
660  setLibcallName(LC.Op, LC.Name);
661  setLibcallCallingConv(LC.Op, LC.CC);
662  }
663  }
664 
665  if (Subtarget->isThumb1Only())
666  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
667  else
668  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
669 
670  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
671  Subtarget->hasFPRegs()) {
672  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
673  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
674  if (!Subtarget->hasVFP2Base())
675  setAllExpand(MVT::f32);
676  if (!Subtarget->hasFP64())
677  setAllExpand(MVT::f64);
678  }
679 
680  if (Subtarget->hasFullFP16()) {
681  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
685 
688  }
689 
690  for (MVT VT : MVT::vector_valuetypes()) {
691  for (MVT InnerVT : MVT::vector_valuetypes()) {
692  setTruncStoreAction(VT, InnerVT, Expand);
693  addAllExtLoads(VT, InnerVT, Expand);
694  }
695 
700 
702  }
703 
706 
709 
710  if (Subtarget->hasMVEIntegerOps())
711  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
712 
713  // Combine low-overhead loop intrinsics so that we can lower i1 types.
714  if (Subtarget->hasLOB()) {
717  }
718 
719  if (Subtarget->hasNEON()) {
720  addDRTypeForNEON(MVT::v2f32);
721  addDRTypeForNEON(MVT::v8i8);
722  addDRTypeForNEON(MVT::v4i16);
723  addDRTypeForNEON(MVT::v2i32);
724  addDRTypeForNEON(MVT::v1i64);
725 
726  addQRTypeForNEON(MVT::v4f32);
727  addQRTypeForNEON(MVT::v2f64);
728  addQRTypeForNEON(MVT::v16i8);
729  addQRTypeForNEON(MVT::v8i16);
730  addQRTypeForNEON(MVT::v4i32);
731  addQRTypeForNEON(MVT::v2i64);
732 
733  if (Subtarget->hasFullFP16()) {
734  addQRTypeForNEON(MVT::v8f16);
735  addDRTypeForNEON(MVT::v4f16);
736  }
737  }
738 
739  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
740  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
741  // none of Neon, MVE or VFP supports any arithmetic operations on it.
745  // FIXME: Code duplication: FDIV and FREM are expanded always, see
746  // ARMTargetLowering::addTypeForNEON method for details.
749  // FIXME: Create unittest.
750  // In another words, find a way when "copysign" appears in DAG with vector
751  // operands.
753  // FIXME: Code duplication: SETCC has custom operation action, see
754  // ARMTargetLowering::addTypeForNEON method for details.
756  // FIXME: Create unittest for FNEG and for FABS.
768  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
775  }
776 
777  if (Subtarget->hasNEON()) {
778  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
779  // supported for v4f32.
794 
795  // Mark v2f32 intrinsics.
810 
811  // Neon does not support some operations on v1i64 and v2i64 types.
813  // Custom handling for some quad-vector types to detect VMULL.
817  // Custom handling for some vector types to avoid expensive expansions
822  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
823  // a destination type that is wider than the source, and nor does
824  // it have a FP_TO_[SU]INT instruction with a narrower destination than
825  // source.
834 
837 
838  // NEON does not have single instruction CTPOP for vectors with element
839  // types wider than 8-bits. However, custom lowering can leverage the
840  // v8i8/v16i8 vcnt instruction.
847 
850 
851  // NEON does not have single instruction CTTZ for vectors.
856 
861 
866 
871 
872  // NEON only has FMA instructions as of VFP4.
873  if (!Subtarget->hasVFP4Base()) {
876  }
877 
892 
893  // It is legal to extload from v4i8 to v4i16 or v4i32.
895  MVT::v2i32}) {
896  for (MVT VT : MVT::integer_vector_valuetypes()) {
900  }
901  }
902  }
903 
904  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
908  }
909 
910  if (!Subtarget->hasFP64()) {
911  // When targeting a floating-point unit with only single-precision
912  // operations, f64 is legal for the few double-precision instructions which
913  // are present However, no double-precision operations other than moves,
914  // loads and stores are provided by the hardware.
946  }
947 
948  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
950  if (Subtarget->hasFullFP16())
952  }
953 
954  if (!Subtarget->hasFP16())
956 
957  if (!Subtarget->hasFP64())
959 
961 
962  // ARM does not have floating-point extending loads.
963  for (MVT VT : MVT::fp_valuetypes()) {
966  }
967 
968  // ... or truncating stores
972 
973  // ARM does not have i1 sign extending load.
974  for (MVT VT : MVT::integer_valuetypes())
976 
977  // ARM supports all 4 flavors of integer indexed load / store.
978  if (!Subtarget->isThumb1Only()) {
979  for (unsigned im = (unsigned)ISD::PRE_INC;
989  }
990  } else {
991  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
994  }
995 
1000 
1003 
1004  // i64 operation support.
1007  if (Subtarget->isThumb1Only()) {
1010  }
1011  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1012  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1014 
1022 
1023  // MVE lowers 64 bit shifts to lsll and lsrl
1024  // assuming that ISD::SRL and SRA of i64 are already marked custom
1025  if (Subtarget->hasMVEIntegerOps())
1027 
1028  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1029  if (Subtarget->isThumb1Only()) {
1033  }
1034 
1035  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1037 
1038  // ARM does not have ROTL.
1040  for (MVT VT : MVT::vector_valuetypes()) {
1043  }
1046  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1049  }
1050 
1051  // @llvm.readcyclecounter requires the Performance Monitors extension.
1052  // Default to the 0 expansion on unsupported platforms.
1053  // FIXME: Technically there are older ARM CPUs that have
1054  // implementation-specific ways of obtaining this information.
1055  if (Subtarget->hasPerfMon())
1057 
1058  // Only ARMv6 has BSWAP.
1059  if (!Subtarget->hasV6Ops())
1061 
1062  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1063  : Subtarget->hasDivideInARMMode();
1064  if (!hasDivide) {
1065  // These are expanded into libcalls if the cpu doesn't have HW divider.
1068  }
1069 
1070  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1073 
1076  }
1077 
1080 
1081  // Register based DivRem for AEABI (RTABI 4.2)
1082  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1083  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1084  Subtarget->isTargetWindows()) {
1087  HasStandaloneRem = false;
1088 
1089  if (Subtarget->isTargetWindows()) {
1090  const struct {
1091  const RTLIB::Libcall Op;
1092  const char * const Name;
1093  const CallingConv::ID CC;
1094  } LibraryCalls[] = {
1095  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1096  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1097  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1098  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1099 
1100  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1101  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1102  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1103  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1104  };
1105 
1106  for (const auto &LC : LibraryCalls) {
1107  setLibcallName(LC.Op, LC.Name);
1108  setLibcallCallingConv(LC.Op, LC.CC);
1109  }
1110  } else {
1111  const struct {
1112  const RTLIB::Libcall Op;
1113  const char * const Name;
1114  const CallingConv::ID CC;
1115  } LibraryCalls[] = {
1116  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1117  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1118  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1119  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1120 
1121  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1122  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1123  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1124  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1125  };
1126 
1127  for (const auto &LC : LibraryCalls) {
1128  setLibcallName(LC.Op, LC.Name);
1129  setLibcallCallingConv(LC.Op, LC.CC);
1130  }
1131  }
1132 
1137  } else {
1140  }
1141 
1142  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
1143  for (auto &VT : {MVT::f32, MVT::f64})
1145 
1150 
1153 
1154  // Use the default implementation.
1161 
1162  if (Subtarget->isTargetWindows())
1164  else
1166 
1167  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1168  // the default expansion.
1169  InsertFencesForAtomic = false;
1170  if (Subtarget->hasAnyDataBarrier() &&
1171  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1172  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1173  // to ldrex/strex loops already.
1175  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1177 
1178  // On v8, we have particularly efficient implementations of atomic fences
1179  // if they can be combined with nearby atomic loads and stores.
1180  if (!Subtarget->hasAcquireRelease() ||
1181  getTargetMachine().getOptLevel() == 0) {
1182  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1183  InsertFencesForAtomic = true;
1184  }
1185  } else {
1186  // If there's anything we can use as a barrier, go through custom lowering
1187  // for ATOMIC_FENCE.
1188  // If target has DMB in thumb, Fences can be inserted.
1189  if (Subtarget->hasDataBarrier())
1190  InsertFencesForAtomic = true;
1191 
1193  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1194 
1195  // Set them all for expansion, which will force libcalls.
1208  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1209  // Unordered/Monotonic case.
1210  if (!InsertFencesForAtomic) {
1213  }
1214  }
1215 
1217 
1218  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1219  if (!Subtarget->hasV6Ops()) {
1222  }
1224 
1225  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1226  !Subtarget->isThumb1Only()) {
1227  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1228  // iff target supports vfp2.
1231  }
1232 
1233  // We want to custom lower some of our intrinsics.
1238  if (Subtarget->useSjLjEH())
1239  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1240 
1250  if (Subtarget->hasFullFP16()) {
1254  }
1255 
1257 
1260  if (Subtarget->hasFullFP16())
1265 
1266  // We don't support sin/cos/fmod/copysign/pow
1275  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1276  !Subtarget->isThumb1Only()) {
1279  }
1282 
1283  if (!Subtarget->hasVFP4Base()) {
1286  }
1287 
1288  // Various VFP goodness
1289  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1290  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1291  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1294  }
1295 
1296  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1297  if (!Subtarget->hasFP16()) {
1300  }
1301  }
1302 
1303  // Use __sincos_stret if available.
1304  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1305  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1308  }
1309 
1310  // FP-ARMv8 implements a lot of rounding-like FP operations.
1311  if (Subtarget->hasFPARMv8Base()) {
1320  if (Subtarget->hasNEON()) {
1325  }
1326 
1327  if (Subtarget->hasFP64()) {
1336  }
1337  }
1338 
1339  // FP16 often need to be promoted to call lib functions
1340  if (Subtarget->hasFullFP16()) {
1353 
1355  }
1356 
1357  if (Subtarget->hasNEON()) {
1358  // vmin and vmax aren't available in a scalar form, so we use
1359  // a NEON instruction with an undef lane instead.
1368 
1369  if (Subtarget->hasFullFP16()) {
1374 
1379  }
1380  }
1381 
1382  // We have target-specific dag combine patterns for the following nodes:
1383  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1390 
1391  if (Subtarget->hasV6Ops())
1393  if (Subtarget->isThumb1Only())
1395 
1397 
1398  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1399  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1401  else
1403 
1404  //// temporary - rewrite interface to use type
1405  MaxStoresPerMemset = 8;
1407  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1409  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1411 
1412  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1413  // are at least 4 bytes aligned.
1415 
1416  // Prefer likely predicted branches to selects on out-of-order cores.
1417  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1418 
1420 
1421  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1422 
1423  if (Subtarget->isThumb() || Subtarget->isThumb2())
1425 }
1426 
1428  return Subtarget->useSoftFloat();
1429 }
1430 
1431 // FIXME: It might make sense to define the representative register class as the
1432 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1433 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1434 // SPR's representative would be DPR_VFP2. This should work well if register
1435 // pressure tracking were modified such that a register use would increment the
1436 // pressure of the register class's representative and all of it's super
1437 // classes' representatives transitively. We have not implemented this because
1438 // of the difficulty prior to coalescing of modeling operand register classes
1439 // due to the common occurrence of cross class copies and subregister insertions
1440 // and extractions.
1441 std::pair<const TargetRegisterClass *, uint8_t>
1443  MVT VT) const {
1444  const TargetRegisterClass *RRC = nullptr;
1445  uint8_t Cost = 1;
1446  switch (VT.SimpleTy) {
1447  default:
1449  // Use DPR as representative register class for all floating point
1450  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1451  // the cost is 1 for both f32 and f64.
1452  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1453  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1454  RRC = &ARM::DPRRegClass;
1455  // When NEON is used for SP, only half of the register file is available
1456  // because operations that define both SP and DP results will be constrained
1457  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1458  // coalescing by double-counting the SP regs. See the FIXME above.
1459  if (Subtarget->useNEONForSinglePrecisionFP())
1460  Cost = 2;
1461  break;
1462  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1463  case MVT::v4f32: case MVT::v2f64:
1464  RRC = &ARM::DPRRegClass;
1465  Cost = 2;
1466  break;
1467  case MVT::v4i64:
1468  RRC = &ARM::DPRRegClass;
1469  Cost = 4;
1470  break;
1471  case MVT::v8i64:
1472  RRC = &ARM::DPRRegClass;
1473  Cost = 8;
1474  break;
1475  }
1476  return std::make_pair(RRC, Cost);
1477 }
1478 
1479 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1480  switch ((ARMISD::NodeType)Opcode) {
1481  case ARMISD::FIRST_NUMBER: break;
1482  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1483  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1484  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1485  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1486  case ARMISD::CALL: return "ARMISD::CALL";
1487  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1488  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1489  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1490  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1491  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1492  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1493  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1494  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1495  case ARMISD::CMP: return "ARMISD::CMP";
1496  case ARMISD::CMN: return "ARMISD::CMN";
1497  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1498  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1499  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1500  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1501  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1502 
1503  case ARMISD::CMOV: return "ARMISD::CMOV";
1504  case ARMISD::SUBS: return "ARMISD::SUBS";
1505 
1506  case ARMISD::SSAT: return "ARMISD::SSAT";
1507  case ARMISD::USAT: return "ARMISD::USAT";
1508 
1509  case ARMISD::ASRL: return "ARMISD::ASRL";
1510  case ARMISD::LSRL: return "ARMISD::LSRL";
1511  case ARMISD::LSLL: return "ARMISD::LSLL";
1512 
1513  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1514  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1515  case ARMISD::RRX: return "ARMISD::RRX";
1516 
1517  case ARMISD::ADDC: return "ARMISD::ADDC";
1518  case ARMISD::ADDE: return "ARMISD::ADDE";
1519  case ARMISD::SUBC: return "ARMISD::SUBC";
1520  case ARMISD::SUBE: return "ARMISD::SUBE";
1521  case ARMISD::LSLS: return "ARMISD::LSLS";
1522 
1523  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1524  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1525  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1526  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1527  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1528 
1529  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1530  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1531  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1532 
1533  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1534 
1535  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1536 
1537  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1538 
1539  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1540 
1541  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1542 
1543  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1544  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1545 
1546  case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1547  case ARMISD::VCMP: return "ARMISD::VCMP";
1548  case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1549  case ARMISD::VTST: return "ARMISD::VTST";
1550 
1551  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1552  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1553  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1554  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1555  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1556  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1557  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1558  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1559  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1560  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1561  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1562  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1563  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1564  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1565  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1566  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1567  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1568  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1569  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1570  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1571  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1572  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1573  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1574  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1575  case ARMISD::VDUP: return "ARMISD::VDUP";
1576  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1577  case ARMISD::VEXT: return "ARMISD::VEXT";
1578  case ARMISD::VREV64: return "ARMISD::VREV64";
1579  case ARMISD::VREV32: return "ARMISD::VREV32";
1580  case ARMISD::VREV16: return "ARMISD::VREV16";
1581  case ARMISD::VZIP: return "ARMISD::VZIP";
1582  case ARMISD::VUZP: return "ARMISD::VUZP";
1583  case ARMISD::VTRN: return "ARMISD::VTRN";
1584  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1585  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1586  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1587  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1588  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1589  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1590  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1591  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1592  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1593  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1594  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1595  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1596  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1597  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1598  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1599  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1600  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1601  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1602  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1603  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1604  case ARMISD::BFI: return "ARMISD::BFI";
1605  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1606  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1607  case ARMISD::VBSL: return "ARMISD::VBSL";
1608  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1609  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1610  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1611  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1612  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1613  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1614  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1615  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1616  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1617  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1618  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1619  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1620  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1621  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1622  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1623  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1624  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1625  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1626  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1627  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1628  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1629  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1630  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1631  case ARMISD::WLS: return "ARMISD::WLS";
1632  case ARMISD::LE: return "ARMISD::LE";
1633  case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1634  }
1635  return nullptr;
1636 }
1637 
1639  EVT VT) const {
1640  if (!VT.isVector())
1641  return getPointerTy(DL);
1642 
1643  // MVE has a predicate register.
1644  if (Subtarget->hasMVEIntegerOps() &&
1645  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1648 }
1649 
1650 /// getRegClassFor - Return the register class that should be used for the
1651 /// specified value type.
1652 const TargetRegisterClass *
1653 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1654  (void)isDivergent;
1655  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1656  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1657  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1658  // MVE Q registers.
1659  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1660  if (VT == MVT::v4i64)
1661  return &ARM::QQPRRegClass;
1662  if (VT == MVT::v8i64)
1663  return &ARM::QQQQPRRegClass;
1664  }
1665  return TargetLowering::getRegClassFor(VT);
1666 }
1667 
1668 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1669 // source/dest is aligned and the copy size is large enough. We therefore want
1670 // to align such objects passed to memory intrinsics.
1672  unsigned &PrefAlign) const {
1673  if (!isa<MemIntrinsic>(CI))
1674  return false;
1675  MinSize = 8;
1676  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1677  // cycle faster than 4-byte aligned LDM.
1678  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1679  return true;
1680 }
1681 
1682 // Create a fast isel object.
1683 FastISel *
1685  const TargetLibraryInfo *libInfo) const {
1686  return ARM::createFastISel(funcInfo, libInfo);
1687 }
1688 
1690  unsigned NumVals = N->getNumValues();
1691  if (!NumVals)
1692  return Sched::RegPressure;
1693 
1694  for (unsigned i = 0; i != NumVals; ++i) {
1695  EVT VT = N->getValueType(i);
1696  if (VT == MVT::Glue || VT == MVT::Other)
1697  continue;
1698  if (VT.isFloatingPoint() || VT.isVector())
1699  return Sched::ILP;
1700  }
1701 
1702  if (!N->isMachineOpcode())
1703  return Sched::RegPressure;
1704 
1705  // Load are scheduled for latency even if there instruction itinerary
1706  // is not available.
1707  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1708  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1709 
1710  if (MCID.getNumDefs() == 0)
1711  return Sched::RegPressure;
1712  if (!Itins->isEmpty() &&
1713  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1714  return Sched::ILP;
1715 
1716  return Sched::RegPressure;
1717 }
1718 
1719 //===----------------------------------------------------------------------===//
1720 // Lowering Code
1721 //===----------------------------------------------------------------------===//
1722 
1723 static bool isSRL16(const SDValue &Op) {
1724  if (Op.getOpcode() != ISD::SRL)
1725  return false;
1726  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1727  return Const->getZExtValue() == 16;
1728  return false;
1729 }
1730 
1731 static bool isSRA16(const SDValue &Op) {
1732  if (Op.getOpcode() != ISD::SRA)
1733  return false;
1734  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1735  return Const->getZExtValue() == 16;
1736  return false;
1737 }
1738 
1739 static bool isSHL16(const SDValue &Op) {
1740  if (Op.getOpcode() != ISD::SHL)
1741  return false;
1742  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1743  return Const->getZExtValue() == 16;
1744  return false;
1745 }
1746 
1747 // Check for a signed 16-bit value. We special case SRA because it makes it
1748 // more simple when also looking for SRAs that aren't sign extending a
1749 // smaller value. Without the check, we'd need to take extra care with
1750 // checking order for some operations.
1751 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1752  if (isSRA16(Op))
1753  return isSHL16(Op.getOperand(0));
1754  return DAG.ComputeNumSignBits(Op) == 17;
1755 }
1756 
1757 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1759  switch (CC) {
1760  default: llvm_unreachable("Unknown condition code!");
1761  case ISD::SETNE: return ARMCC::NE;
1762  case ISD::SETEQ: return ARMCC::EQ;
1763  case ISD::SETGT: return ARMCC::GT;
1764  case ISD::SETGE: return ARMCC::GE;
1765  case ISD::SETLT: return ARMCC::LT;
1766  case ISD::SETLE: return ARMCC::LE;
1767  case ISD::SETUGT: return ARMCC::HI;
1768  case ISD::SETUGE: return ARMCC::HS;
1769  case ISD::SETULT: return ARMCC::LO;
1770  case ISD::SETULE: return ARMCC::LS;
1771  }
1772 }
1773 
1774 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1776  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1777  CondCode2 = ARMCC::AL;
1778  InvalidOnQNaN = true;
1779  switch (CC) {
1780  default: llvm_unreachable("Unknown FP condition!");
1781  case ISD::SETEQ:
1782  case ISD::SETOEQ:
1783  CondCode = ARMCC::EQ;
1784  InvalidOnQNaN = false;
1785  break;
1786  case ISD::SETGT:
1787  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1788  case ISD::SETGE:
1789  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1790  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1791  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1792  case ISD::SETONE:
1793  CondCode = ARMCC::MI;
1794  CondCode2 = ARMCC::GT;
1795  InvalidOnQNaN = false;
1796  break;
1797  case ISD::SETO: CondCode = ARMCC::VC; break;
1798  case ISD::SETUO: CondCode = ARMCC::VS; break;
1799  case ISD::SETUEQ:
1800  CondCode = ARMCC::EQ;
1801  CondCode2 = ARMCC::VS;
1802  InvalidOnQNaN = false;
1803  break;
1804  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1805  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1806  case ISD::SETLT:
1807  case ISD::SETULT: CondCode = ARMCC::LT; break;
1808  case ISD::SETLE:
1809  case ISD::SETULE: CondCode = ARMCC::LE; break;
1810  case ISD::SETNE:
1811  case ISD::SETUNE:
1812  CondCode = ARMCC::NE;
1813  InvalidOnQNaN = false;
1814  break;
1815  }
1816 }
1817 
1818 //===----------------------------------------------------------------------===//
1819 // Calling Convention Implementation
1820 //===----------------------------------------------------------------------===//
1821 
1822 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1823 /// account presence of floating point hardware and calling convention
1824 /// limitations, such as support for variadic functions.
1826 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1827  bool isVarArg) const {
1828  switch (CC) {
1829  default:
1830  report_fatal_error("Unsupported calling convention");
1832  case CallingConv::ARM_APCS:
1833  case CallingConv::GHC:
1834  return CC;
1838  case CallingConv::Swift:
1840  case CallingConv::C:
1841  if (!Subtarget->isAAPCS_ABI())
1842  return CallingConv::ARM_APCS;
1843  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1845  !isVarArg)
1847  else
1848  return CallingConv::ARM_AAPCS;
1849  case CallingConv::Fast:
1851  if (!Subtarget->isAAPCS_ABI()) {
1852  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1853  return CallingConv::Fast;
1854  return CallingConv::ARM_APCS;
1855  } else if (Subtarget->hasVFP2Base() &&
1856  !Subtarget->isThumb1Only() && !isVarArg)
1858  else
1859  return CallingConv::ARM_AAPCS;
1860  }
1861 }
1862 
1864  bool isVarArg) const {
1865  return CCAssignFnForNode(CC, false, isVarArg);
1866 }
1867 
1869  bool isVarArg) const {
1870  return CCAssignFnForNode(CC, true, isVarArg);
1871 }
1872 
1873 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1874 /// CallingConvention.
1875 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1876  bool Return,
1877  bool isVarArg) const {
1878  switch (getEffectiveCallingConv(CC, isVarArg)) {
1879  default:
1880  report_fatal_error("Unsupported calling convention");
1881  case CallingConv::ARM_APCS:
1882  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1884  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1886  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1887  case CallingConv::Fast:
1888  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1889  case CallingConv::GHC:
1890  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1892  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1893  }
1894 }
1895 
1896 /// LowerCallResult - Lower the result values of a call into the
1897 /// appropriate copies out of appropriate physical registers.
1898 SDValue ARMTargetLowering::LowerCallResult(
1899  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1900  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1901  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1902  SDValue ThisVal) const {
1903  // Assign locations to each value returned by this call.
1905  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1906  *DAG.getContext());
1907  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1908 
1909  // Copy all of the result registers out of their specified physreg.
1910  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1911  CCValAssign VA = RVLocs[i];
1912 
1913  // Pass 'this' value directly from the argument to return value, to avoid
1914  // reg unit interference
1915  if (i == 0 && isThisReturn) {
1916  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1917  "unexpected return calling convention register assignment");
1918  InVals.push_back(ThisVal);
1919  continue;
1920  }
1921 
1922  SDValue Val;
1923  if (VA.needsCustom()) {
1924  // Handle f64 or half of a v2f64.
1925  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1926  InFlag);
1927  Chain = Lo.getValue(1);
1928  InFlag = Lo.getValue(2);
1929  VA = RVLocs[++i]; // skip ahead to next loc
1930  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1931  InFlag);
1932  Chain = Hi.getValue(1);
1933  InFlag = Hi.getValue(2);
1934  if (!Subtarget->isLittle())
1935  std::swap (Lo, Hi);
1936  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1937 
1938  if (VA.getLocVT() == MVT::v2f64) {
1939  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1940  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1941  DAG.getConstant(0, dl, MVT::i32));
1942 
1943  VA = RVLocs[++i]; // skip ahead to next loc
1944  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1945  Chain = Lo.getValue(1);
1946  InFlag = Lo.getValue(2);
1947  VA = RVLocs[++i]; // skip ahead to next loc
1948  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1949  Chain = Hi.getValue(1);
1950  InFlag = Hi.getValue(2);
1951  if (!Subtarget->isLittle())
1952  std::swap (Lo, Hi);
1953  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1954  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1955  DAG.getConstant(1, dl, MVT::i32));
1956  }
1957  } else {
1958  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1959  InFlag);
1960  Chain = Val.getValue(1);
1961  InFlag = Val.getValue(2);
1962  }
1963 
1964  switch (VA.getLocInfo()) {
1965  default: llvm_unreachable("Unknown loc info!");
1966  case CCValAssign::Full: break;
1967  case CCValAssign::BCvt:
1968  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1969  break;
1970  }
1971 
1972  InVals.push_back(Val);
1973  }
1974 
1975  return Chain;
1976 }
1977 
1978 /// LowerMemOpCallTo - Store the argument to the stack.
1979 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1980  SDValue Arg, const SDLoc &dl,
1981  SelectionDAG &DAG,
1982  const CCValAssign &VA,
1983  ISD::ArgFlagsTy Flags) const {
1984  unsigned LocMemOffset = VA.getLocMemOffset();
1985  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1986  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1987  StackPtr, PtrOff);
1988  return DAG.getStore(
1989  Chain, dl, Arg, PtrOff,
1990  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1991 }
1992 
1993 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1994  SDValue Chain, SDValue &Arg,
1995  RegsToPassVector &RegsToPass,
1996  CCValAssign &VA, CCValAssign &NextVA,
1997  SDValue &StackPtr,
1998  SmallVectorImpl<SDValue> &MemOpChains,
1999  ISD::ArgFlagsTy Flags) const {
2000  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2001  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2002  unsigned id = Subtarget->isLittle() ? 0 : 1;
2003  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2004 
2005  if (NextVA.isRegLoc())
2006  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2007  else {
2008  assert(NextVA.isMemLoc());
2009  if (!StackPtr.getNode())
2010  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2011  getPointerTy(DAG.getDataLayout()));
2012 
2013  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
2014  dl, DAG, NextVA,
2015  Flags));
2016  }
2017 }
2018 
2019 /// LowerCall - Lowering a call into a callseq_start <-
2020 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2021 /// nodes.
2022 SDValue
2023 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2024  SmallVectorImpl<SDValue> &InVals) const {
2025  SelectionDAG &DAG = CLI.DAG;
2026  SDLoc &dl = CLI.DL;
2028  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2030  SDValue Chain = CLI.Chain;
2031  SDValue Callee = CLI.Callee;
2032  bool &isTailCall = CLI.IsTailCall;
2033  CallingConv::ID CallConv = CLI.CallConv;
2034  bool doesNotRet = CLI.DoesNotReturn;
2035  bool isVarArg = CLI.IsVarArg;
2036 
2037  MachineFunction &MF = DAG.getMachineFunction();
2038  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2039  bool isThisReturn = false;
2040  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
2041  bool PreferIndirect = false;
2042 
2043  // Disable tail calls if they're not supported.
2044  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
2045  isTailCall = false;
2046 
2047  if (isa<GlobalAddressSDNode>(Callee)) {
2048  // If we're optimizing for minimum size and the function is called three or
2049  // more times in this block, we can improve codesize by calling indirectly
2050  // as BLXr has a 16-bit encoding.
2051  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2052  if (CLI.CS) {
2053  auto *BB = CLI.CS.getParent();
2054  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2055  count_if(GV->users(), [&BB](const User *U) {
2056  return isa<Instruction>(U) &&
2057  cast<Instruction>(U)->getParent() == BB;
2058  }) > 2;
2059  }
2060  }
2061  if (isTailCall) {
2062  // Check if it's really possible to do a tail call.
2063  isTailCall = IsEligibleForTailCallOptimization(
2064  Callee, CallConv, isVarArg, isStructRet,
2065  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2066  PreferIndirect);
2067  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2068  report_fatal_error("failed to perform tail call elimination on a call "
2069  "site marked musttail");
2070  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2071  // detected sibcalls.
2072  if (isTailCall)
2073  ++NumTailCalls;
2074  }
2075 
2076  // Analyze operands of the call, assigning locations to each operand.
2078  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2079  *DAG.getContext());
2080  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2081 
2082  // Get a count of how many bytes are to be pushed on the stack.
2083  unsigned NumBytes = CCInfo.getNextStackOffset();
2084 
2085  if (isTailCall) {
2086  // For tail calls, memory operands are available in our caller's stack.
2087  NumBytes = 0;
2088  } else {
2089  // Adjust the stack pointer for the new arguments...
2090  // These operations are automatically eliminated by the prolog/epilog pass
2091  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2092  }
2093 
2094  SDValue StackPtr =
2095  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2096 
2097  RegsToPassVector RegsToPass;
2098  SmallVector<SDValue, 8> MemOpChains;
2099 
2100  // Walk the register/memloc assignments, inserting copies/loads. In the case
2101  // of tail call optimization, arguments are handled later.
2102  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2103  i != e;
2104  ++i, ++realArgIdx) {
2105  CCValAssign &VA = ArgLocs[i];
2106  SDValue Arg = OutVals[realArgIdx];
2107  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2108  bool isByVal = Flags.isByVal();
2109 
2110  // Promote the value if needed.
2111  switch (VA.getLocInfo()) {
2112  default: llvm_unreachable("Unknown loc info!");
2113  case CCValAssign::Full: break;
2114  case CCValAssign::SExt:
2115  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2116  break;
2117  case CCValAssign::ZExt:
2118  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2119  break;
2120  case CCValAssign::AExt:
2121  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2122  break;
2123  case CCValAssign::BCvt:
2124  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2125  break;
2126  }
2127 
2128  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2129  if (VA.needsCustom()) {
2130  if (VA.getLocVT() == MVT::v2f64) {
2131  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2132  DAG.getConstant(0, dl, MVT::i32));
2133  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2134  DAG.getConstant(1, dl, MVT::i32));
2135 
2136  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2137  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2138 
2139  VA = ArgLocs[++i]; // skip ahead to next loc
2140  if (VA.isRegLoc()) {
2141  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2142  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2143  } else {
2144  assert(VA.isMemLoc());
2145 
2146  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2147  dl, DAG, VA, Flags));
2148  }
2149  } else {
2150  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2151  StackPtr, MemOpChains, Flags);
2152  }
2153  } else if (VA.isRegLoc()) {
2154  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2155  Outs[0].VT == MVT::i32) {
2156  assert(VA.getLocVT() == MVT::i32 &&
2157  "unexpected calling convention register assignment");
2158  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2159  "unexpected use of 'returned'");
2160  isThisReturn = true;
2161  }
2162  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2163  } else if (isByVal) {
2164  assert(VA.isMemLoc());
2165  unsigned offset = 0;
2166 
2167  // True if this byval aggregate will be split between registers
2168  // and memory.
2169  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2170  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2171 
2172  if (CurByValIdx < ByValArgsCount) {
2173 
2174  unsigned RegBegin, RegEnd;
2175  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2176 
2177  EVT PtrVT =
2179  unsigned int i, j;
2180  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2181  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2182  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2183  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2185  DAG.InferPtrAlignment(AddArg));
2186  MemOpChains.push_back(Load.getValue(1));
2187  RegsToPass.push_back(std::make_pair(j, Load));
2188  }
2189 
2190  // If parameter size outsides register area, "offset" value
2191  // helps us to calculate stack slot for remained part properly.
2192  offset = RegEnd - RegBegin;
2193 
2194  CCInfo.nextInRegsParam();
2195  }
2196 
2197  if (Flags.getByValSize() > 4*offset) {
2198  auto PtrVT = getPointerTy(DAG.getDataLayout());
2199  unsigned LocMemOffset = VA.getLocMemOffset();
2200  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2201  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2202  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2203  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2204  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2205  MVT::i32);
2206  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2207  MVT::i32);
2208 
2209  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2210  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2211  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2212  Ops));
2213  }
2214  } else if (!isTailCall) {
2215  assert(VA.isMemLoc());
2216 
2217  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2218  dl, DAG, VA, Flags));
2219  }
2220  }
2221 
2222  if (!MemOpChains.empty())
2223  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2224 
2225  // Build a sequence of copy-to-reg nodes chained together with token chain
2226  // and flag operands which copy the outgoing args into the appropriate regs.
2227  SDValue InFlag;
2228  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2229  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2230  RegsToPass[i].second, InFlag);
2231  InFlag = Chain.getValue(1);
2232  }
2233 
2234  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2235  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2236  // node so that legalize doesn't hack it.
2237  bool isDirect = false;
2238 
2239  const TargetMachine &TM = getTargetMachine();
2240  const Module *Mod = MF.getFunction().getParent();
2241  const GlobalValue *GV = nullptr;
2242  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2243  GV = G->getGlobal();
2244  bool isStub =
2245  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2246 
2247  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2248  bool isLocalARMFunc = false;
2250  auto PtrVt = getPointerTy(DAG.getDataLayout());
2251 
2252  if (Subtarget->genLongCalls()) {
2253  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2254  "long-calls codegen is not position independent!");
2255  // Handle a global address or an external symbol. If it's not one of
2256  // those, the target's already in a register, so we don't need to do
2257  // anything extra.
2258  if (isa<GlobalAddressSDNode>(Callee)) {
2259  // Create a constant pool entry for the callee address
2260  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2261  ARMConstantPoolValue *CPV =
2262  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2263 
2264  // Get the address of the callee into a register
2265  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2266  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2267  Callee = DAG.getLoad(
2268  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2270  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2271  const char *Sym = S->getSymbol();
2272 
2273  // Create a constant pool entry for the callee address
2274  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2275  ARMConstantPoolValue *CPV =
2277  ARMPCLabelIndex, 0);
2278  // Get the address of the callee into a register
2279  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2280  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2281  Callee = DAG.getLoad(
2282  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2284  }
2285  } else if (isa<GlobalAddressSDNode>(Callee)) {
2286  if (!PreferIndirect) {
2287  isDirect = true;
2288  bool isDef = GV->isStrongDefinitionForLinker();
2289 
2290  // ARM call to a local ARM function is predicable.
2291  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2292  // tBX takes a register source operand.
2293  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2294  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2295  Callee = DAG.getNode(
2296  ARMISD::WrapperPIC, dl, PtrVt,
2297  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2298  Callee = DAG.getLoad(
2299  PtrVt, dl, DAG.getEntryNode(), Callee,
2301  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2303  } else if (Subtarget->isTargetCOFF()) {
2304  assert(Subtarget->isTargetWindows() &&
2305  "Windows is the only supported COFF target");
2306  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2309  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2310  TargetFlags);
2311  if (GV->hasDLLImportStorageClass())
2312  Callee =
2313  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2314  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2316  } else {
2317  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2318  }
2319  }
2320  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2321  isDirect = true;
2322  // tBX takes a register source operand.
2323  const char *Sym = S->getSymbol();
2324  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2325  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2326  ARMConstantPoolValue *CPV =
2328  ARMPCLabelIndex, 4);
2329  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2330  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2331  Callee = DAG.getLoad(
2332  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2334  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2335  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2336  } else {
2337  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2338  }
2339  }
2340 
2341  // FIXME: handle tail calls differently.
2342  unsigned CallOpc;
2343  if (Subtarget->isThumb()) {
2344  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2345  CallOpc = ARMISD::CALL_NOLINK;
2346  else
2347  CallOpc = ARMISD::CALL;
2348  } else {
2349  if (!isDirect && !Subtarget->hasV5TOps())
2350  CallOpc = ARMISD::CALL_NOLINK;
2351  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2352  // Emit regular call when code size is the priority
2353  !Subtarget->hasMinSize())
2354  // "mov lr, pc; b _foo" to avoid confusing the RSP
2355  CallOpc = ARMISD::CALL_NOLINK;
2356  else
2357  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2358  }
2359 
2360  std::vector<SDValue> Ops;
2361  Ops.push_back(Chain);
2362  Ops.push_back(Callee);
2363 
2364  // Add argument registers to the end of the list so that they are known live
2365  // into the call.
2366  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2367  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2368  RegsToPass[i].second.getValueType()));
2369 
2370  // Add a register mask operand representing the call-preserved registers.
2371  if (!isTailCall) {
2372  const uint32_t *Mask;
2373  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2374  if (isThisReturn) {
2375  // For 'this' returns, use the R0-preserving mask if applicable
2376  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2377  if (!Mask) {
2378  // Set isThisReturn to false if the calling convention is not one that
2379  // allows 'returned' to be modeled in this way, so LowerCallResult does
2380  // not try to pass 'this' straight through
2381  isThisReturn = false;
2382  Mask = ARI->getCallPreservedMask(MF, CallConv);
2383  }
2384  } else
2385  Mask = ARI->getCallPreservedMask(MF, CallConv);
2386 
2387  assert(Mask && "Missing call preserved mask for calling convention");
2388  Ops.push_back(DAG.getRegisterMask(Mask));
2389  }
2390 
2391  if (InFlag.getNode())
2392  Ops.push_back(InFlag);
2393 
2394  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2395  if (isTailCall) {
2397  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2398  }
2399 
2400  // Returns a chain and a flag for retval copy to use.
2401  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2402  InFlag = Chain.getValue(1);
2403 
2404  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2405  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2406  if (!Ins.empty())
2407  InFlag = Chain.getValue(1);
2408 
2409  // Handle result values, copying them out of physregs into vregs that we
2410  // return.
2411  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2412  InVals, isThisReturn,
2413  isThisReturn ? OutVals[0] : SDValue());
2414 }
2415 
2416 /// HandleByVal - Every parameter *after* a byval parameter is passed
2417 /// on the stack. Remember the next parameter register to allocate,
2418 /// and then confiscate the rest of the parameter registers to insure
2419 /// this.
2420 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2421  unsigned Align) const {
2422  // Byval (as with any stack) slots are always at least 4 byte aligned.
2423  Align = std::max(Align, 4U);
2424 
2425  unsigned Reg = State->AllocateReg(GPRArgRegs);
2426  if (!Reg)
2427  return;
2428 
2429  unsigned AlignInRegs = Align / 4;
2430  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2431  for (unsigned i = 0; i < Waste; ++i)
2432  Reg = State->AllocateReg(GPRArgRegs);
2433 
2434  if (!Reg)
2435  return;
2436 
2437  unsigned Excess = 4 * (ARM::R4 - Reg);
2438 
2439  // Special case when NSAA != SP and parameter size greater than size of
2440  // all remained GPR regs. In that case we can't split parameter, we must
2441  // send it to stack. We also must set NCRN to R4, so waste all
2442  // remained registers.
2443  const unsigned NSAAOffset = State->getNextStackOffset();
2444  if (NSAAOffset != 0 && Size > Excess) {
2445  while (State->AllocateReg(GPRArgRegs))
2446  ;
2447  return;
2448  }
2449 
2450  // First register for byval parameter is the first register that wasn't
2451  // allocated before this method call, so it would be "reg".
2452  // If parameter is small enough to be saved in range [reg, r4), then
2453  // the end (first after last) register would be reg + param-size-in-regs,
2454  // else parameter would be splitted between registers and stack,
2455  // end register would be r4 in this case.
2456  unsigned ByValRegBegin = Reg;
2457  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2458  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2459  // Note, first register is allocated in the beginning of function already,
2460  // allocate remained amount of registers we need.
2461  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2462  State->AllocateReg(GPRArgRegs);
2463  // A byval parameter that is split between registers and memory needs its
2464  // size truncated here.
2465  // In the case where the entire structure fits in registers, we set the
2466  // size in memory to zero.
2467  Size = std::max<int>(Size - Excess, 0);
2468 }
2469 
2470 /// MatchingStackOffset - Return true if the given stack call argument is
2471 /// already available in the same position (relatively) of the caller's
2472 /// incoming argument stack.
2473 static
2476  const TargetInstrInfo *TII) {
2477  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2478  int FI = std::numeric_limits<int>::max();
2479  if (Arg.getOpcode() == ISD::CopyFromReg) {
2480  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2481  if (!Register::isVirtualRegister(VR))
2482  return false;
2483  MachineInstr *Def = MRI->getVRegDef(VR);
2484  if (!Def)
2485  return false;
2486  if (!Flags.isByVal()) {
2487  if (!TII->isLoadFromStackSlot(*Def, FI))
2488  return false;
2489  } else {
2490  return false;
2491  }
2492  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2493  if (Flags.isByVal())
2494  // ByVal argument is passed in as a pointer but it's now being
2495  // dereferenced. e.g.
2496  // define @foo(%struct.X* %A) {
2497  // tail call @bar(%struct.X* byval %A)
2498  // }
2499  return false;
2500  SDValue Ptr = Ld->getBasePtr();
2501  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2502  if (!FINode)
2503  return false;
2504  FI = FINode->getIndex();
2505  } else
2506  return false;
2507 
2509  if (!MFI.isFixedObjectIndex(FI))
2510  return false;
2511  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2512 }
2513 
2514 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2515 /// for tail call optimization. Targets which want to do tail call
2516 /// optimization should implement this function.
2517 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2518  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2519  bool isCalleeStructRet, bool isCallerStructRet,
2520  const SmallVectorImpl<ISD::OutputArg> &Outs,
2521  const SmallVectorImpl<SDValue> &OutVals,
2523  const bool isIndirect) const {
2524  MachineFunction &MF = DAG.getMachineFunction();
2525  const Function &CallerF = MF.getFunction();
2526  CallingConv::ID CallerCC = CallerF.getCallingConv();
2527 
2528  assert(Subtarget->supportsTailCall());
2529 
2530  // Indirect tail calls cannot be optimized for Thumb1 if the args
2531  // to the call take up r0-r3. The reason is that there are no legal registers
2532  // left to hold the pointer to the function to be called.
2533  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2534  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2535  return false;
2536 
2537  // Look for obvious safe cases to perform tail call optimization that do not
2538  // require ABI changes. This is what gcc calls sibcall.
2539 
2540  // Exception-handling functions need a special set of instructions to indicate
2541  // a return to the hardware. Tail-calling another function would probably
2542  // break this.
2543  if (CallerF.hasFnAttribute("interrupt"))
2544  return false;
2545 
2546  // Also avoid sibcall optimization if either caller or callee uses struct
2547  // return semantics.
2548  if (isCalleeStructRet || isCallerStructRet)
2549  return false;
2550 
2551  // Externally-defined functions with weak linkage should not be
2552  // tail-called on ARM when the OS does not support dynamic
2553  // pre-emption of symbols, as the AAELF spec requires normal calls
2554  // to undefined weak functions to be replaced with a NOP or jump to the
2555  // next instruction. The behaviour of branch instructions in this
2556  // situation (as used for tail calls) is implementation-defined, so we
2557  // cannot rely on the linker replacing the tail call with a return.
2558  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2559  const GlobalValue *GV = G->getGlobal();
2561  if (GV->hasExternalWeakLinkage() &&
2562  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2563  return false;
2564  }
2565 
2566  // Check that the call results are passed in the same way.
2567  LLVMContext &C = *DAG.getContext();
2568  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2569  CCAssignFnForReturn(CalleeCC, isVarArg),
2570  CCAssignFnForReturn(CallerCC, isVarArg)))
2571  return false;
2572  // The callee has to preserve all registers the caller needs to preserve.
2573  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2574  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2575  if (CalleeCC != CallerCC) {
2576  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2577  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2578  return false;
2579  }
2580 
2581  // If Caller's vararg or byval argument has been split between registers and
2582  // stack, do not perform tail call, since part of the argument is in caller's
2583  // local frame.
2584  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2585  if (AFI_Caller->getArgRegsSaveSize())
2586  return false;
2587 
2588  // If the callee takes no arguments then go on to check the results of the
2589  // call.
2590  if (!Outs.empty()) {
2591  // Check if stack adjustment is needed. For now, do not do this if any
2592  // argument is passed on the stack.
2594  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2595  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2596  if (CCInfo.getNextStackOffset()) {
2597  // Check if the arguments are already laid out in the right way as
2598  // the caller's fixed stack objects.
2599  MachineFrameInfo &MFI = MF.getFrameInfo();
2600  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2601  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2602  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2603  i != e;
2604  ++i, ++realArgIdx) {
2605  CCValAssign &VA = ArgLocs[i];
2606  EVT RegVT = VA.getLocVT();
2607  SDValue Arg = OutVals[realArgIdx];
2608  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2609  if (VA.getLocInfo() == CCValAssign::Indirect)
2610  return false;
2611  if (VA.needsCustom()) {
2612  // f64 and vector types are split into multiple registers or
2613  // register/stack-slot combinations. The types will not match
2614  // the registers; give up on memory f64 refs until we figure
2615  // out what to do about this.
2616  if (!VA.isRegLoc())
2617  return false;
2618  if (!ArgLocs[++i].isRegLoc())
2619  return false;
2620  if (RegVT == MVT::v2f64) {
2621  if (!ArgLocs[++i].isRegLoc())
2622  return false;
2623  if (!ArgLocs[++i].isRegLoc())
2624  return false;
2625  }
2626  } else if (!VA.isRegLoc()) {
2627  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2628  MFI, MRI, TII))
2629  return false;
2630  }
2631  }
2632  }
2633 
2634  const MachineRegisterInfo &MRI = MF.getRegInfo();
2635  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2636  return false;
2637  }
2638 
2639  return true;
2640 }
2641 
2642 bool
2643 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2644  MachineFunction &MF, bool isVarArg,
2645  const SmallVectorImpl<ISD::OutputArg> &Outs,
2646  LLVMContext &Context) const {
2648  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2649  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2650 }
2651 
2653  const SDLoc &DL, SelectionDAG &DAG) {
2654  const MachineFunction &MF = DAG.getMachineFunction();
2655  const Function &F = MF.getFunction();
2656 
2657  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2658 
2659  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2660  // version of the "preferred return address". These offsets affect the return
2661  // instruction if this is a return from PL1 without hypervisor extensions.
2662  // IRQ/FIQ: +4 "subs pc, lr, #4"
2663  // SWI: 0 "subs pc, lr, #0"
2664  // ABORT: +4 "subs pc, lr, #4"
2665  // UNDEF: +4/+2 "subs pc, lr, #0"
2666  // UNDEF varies depending on where the exception came from ARM or Thumb
2667  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2668 
2669  int64_t LROffset;
2670  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2671  IntKind == "ABORT")
2672  LROffset = 4;
2673  else if (IntKind == "SWI" || IntKind == "UNDEF")
2674  LROffset = 0;
2675  else
2676  report_fatal_error("Unsupported interrupt attribute. If present, value "
2677  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2678 
2679  RetOps.insert(RetOps.begin() + 1,
2680  DAG.getConstant(LROffset, DL, MVT::i32, false));
2681 
2682  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2683 }
2684 
2685 SDValue
2686 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2687  bool isVarArg,
2688  const SmallVectorImpl<ISD::OutputArg> &Outs,
2689  const SmallVectorImpl<SDValue> &OutVals,
2690  const SDLoc &dl, SelectionDAG &DAG) const {
2691  // CCValAssign - represent the assignment of the return value to a location.
2693 
2694  // CCState - Info about the registers and stack slots.
2695  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2696  *DAG.getContext());
2697 
2698  // Analyze outgoing return values.
2699  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2700 
2701  SDValue Flag;
2702  SmallVector<SDValue, 4> RetOps;
2703  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2704  bool isLittleEndian = Subtarget->isLittle();
2705 
2706  MachineFunction &MF = DAG.getMachineFunction();
2708  AFI->setReturnRegsCount(RVLocs.size());
2709 
2710  // Copy the result values into the output registers.
2711  for (unsigned i = 0, realRVLocIdx = 0;
2712  i != RVLocs.size();
2713  ++i, ++realRVLocIdx) {
2714  CCValAssign &VA = RVLocs[i];
2715  assert(VA.isRegLoc() && "Can only return in registers!");
2716 
2717  SDValue Arg = OutVals[realRVLocIdx];
2718  bool ReturnF16 = false;
2719 
2720  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2721  // Half-precision return values can be returned like this:
2722  //
2723  // t11 f16 = fadd ...
2724  // t12: i16 = bitcast t11
2725  // t13: i32 = zero_extend t12
2726  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2727  //
2728  // to avoid code generation for bitcasts, we simply set Arg to the node
2729  // that produces the f16 value, t11 in this case.
2730  //
2731  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2732  SDValue ZE = Arg.getOperand(0);
2733  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2734  SDValue BC = ZE.getOperand(0);
2735  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2736  Arg = BC.getOperand(0);
2737  ReturnF16 = true;
2738  }
2739  }
2740  }
2741  }
2742 
2743  switch (VA.getLocInfo()) {
2744  default: llvm_unreachable("Unknown loc info!");
2745  case CCValAssign::Full: break;
2746  case CCValAssign::BCvt:
2747  if (!ReturnF16)
2748  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2749  break;
2750  }
2751 
2752  if (VA.needsCustom()) {
2753  if (VA.getLocVT() == MVT::v2f64) {
2754  // Extract the first half and return it in two registers.
2755  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2756  DAG.getConstant(0, dl, MVT::i32));
2757  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2758  DAG.getVTList(MVT::i32, MVT::i32), Half);
2759 
2760  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2761  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2762  Flag);
2763  Flag = Chain.getValue(1);
2764  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2765  VA = RVLocs[++i]; // skip ahead to next loc
2766  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2767  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2768  Flag);
2769  Flag = Chain.getValue(1);
2770  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2771  VA = RVLocs[++i]; // skip ahead to next loc
2772 
2773  // Extract the 2nd half and fall through to handle it as an f64 value.
2774  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2775  DAG.getConstant(1, dl, MVT::i32));
2776  }
2777  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2778  // available.
2779  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2780  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2781  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2782  fmrrd.getValue(isLittleEndian ? 0 : 1),
2783  Flag);
2784  Flag = Chain.getValue(1);
2785  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2786  VA = RVLocs[++i]; // skip ahead to next loc
2787  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2788  fmrrd.getValue(isLittleEndian ? 1 : 0),
2789  Flag);
2790  } else
2791  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2792 
2793  // Guarantee that all emitted copies are
2794  // stuck together, avoiding something bad.
2795  Flag = Chain.getValue(1);
2796  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2797  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2798  }
2799  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2800  const MCPhysReg *I =
2802  if (I) {
2803  for (; *I; ++I) {
2804  if (ARM::GPRRegClass.contains(*I))
2805  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2806  else if (ARM::DPRRegClass.contains(*I))
2807  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2808  else
2809  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2810  }
2811  }
2812 
2813  // Update chain and glue.
2814  RetOps[0] = Chain;
2815  if (Flag.getNode())
2816  RetOps.push_back(Flag);
2817 
2818  // CPUs which aren't M-class use a special sequence to return from
2819  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2820  // though we use "subs pc, lr, #N").
2821  //
2822  // M-class CPUs actually use a normal return sequence with a special
2823  // (hardware-provided) value in LR, so the normal code path works.
2824  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2825  !Subtarget->isMClass()) {
2826  if (Subtarget->isThumb1Only())
2827  report_fatal_error("interrupt attribute is not supported in Thumb1");
2828  return LowerInterruptReturn(RetOps, dl, DAG);
2829  }
2830 
2831  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2832 }
2833 
2834 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2835  if (N->getNumValues() != 1)
2836  return false;
2837  if (!N->hasNUsesOfValue(1, 0))
2838  return false;
2839 
2840  SDValue TCChain = Chain;
2841  SDNode *Copy = *N->use_begin();
2842  if (Copy->getOpcode() == ISD::CopyToReg) {
2843  // If the copy has a glue operand, we conservatively assume it isn't safe to
2844  // perform a tail call.
2845  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2846  return false;
2847  TCChain = Copy->getOperand(0);
2848  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2849  SDNode *VMov = Copy;
2850  // f64 returned in a pair of GPRs.
2852  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2853  UI != UE; ++UI) {
2854  if (UI->getOpcode() != ISD::CopyToReg)
2855  return false;
2856  Copies.insert(*UI);
2857  }
2858  if (Copies.size() > 2)
2859  return false;
2860 
2861  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2862  UI != UE; ++UI) {
2863  SDValue UseChain = UI->getOperand(0);
2864  if (Copies.count(UseChain.getNode()))
2865  // Second CopyToReg
2866  Copy = *UI;
2867  else {
2868  // We are at the top of this chain.
2869  // If the copy has a glue operand, we conservatively assume it
2870  // isn't safe to perform a tail call.
2871  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2872  return false;
2873  // First CopyToReg
2874  TCChain = UseChain;
2875  }
2876  }
2877  } else if (Copy->getOpcode() == ISD::BITCAST) {
2878  // f32 returned in a single GPR.
2879  if (!Copy->hasOneUse())
2880  return false;
2881  Copy = *Copy->use_begin();
2882  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2883  return false;
2884  // If the copy has a glue operand, we conservatively assume it isn't safe to
2885  // perform a tail call.
2886  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2887  return false;
2888  TCChain = Copy->getOperand(0);
2889  } else {
2890  return false;
2891  }
2892 
2893  bool HasRet = false;
2894  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2895  UI != UE; ++UI) {
2896  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2897  UI->getOpcode() != ARMISD::INTRET_FLAG)
2898  return false;
2899  HasRet = true;
2900  }
2901 
2902  if (!HasRet)
2903  return false;
2904 
2905  Chain = TCChain;
2906  return true;
2907 }
2908 
2909 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2910  if (!Subtarget->supportsTailCall())
2911  return false;
2912 
2913  auto Attr =
2914  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2915  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2916  return false;
2917 
2918  return true;
2919 }
2920 
2921 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2922 // and pass the lower and high parts through.
2924  SDLoc DL(Op);
2925  SDValue WriteValue = Op->getOperand(2);
2926 
2927  // This function is only supposed to be called for i64 type argument.
2928  assert(WriteValue.getValueType() == MVT::i64
2929  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2930 
2931  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2932  DAG.getConstant(0, DL, MVT::i32));
2933  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2934  DAG.getConstant(1, DL, MVT::i32));
2935  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2936  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2937 }
2938 
2939 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2940 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2941 // one of the above mentioned nodes. It has to be wrapped because otherwise
2942 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2943 // be used to form addressing mode. These wrapped nodes will be selected
2944 // into MOVi.
2945 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2946  SelectionDAG &DAG) const {
2947  EVT PtrVT = Op.getValueType();
2948  // FIXME there is no actual debug info here
2949  SDLoc dl(Op);
2950  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2951  SDValue Res;
2952 
2953  // When generating execute-only code Constant Pools must be promoted to the
2954  // global data section. It's a bit ugly that we can't share them across basic
2955  // blocks, but this way we guarantee that execute-only behaves correct with
2956  // position-independent addressing modes.
2957  if (Subtarget->genExecuteOnly()) {
2958  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2959  auto T = const_cast<Type*>(CP->getType());
2960  auto C = const_cast<Constant*>(CP->getConstVal());
2961  auto M = const_cast<Module*>(DAG.getMachineFunction().
2962  getFunction().getParent());
2963  auto GV = new GlobalVariable(
2964  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
2965  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2966  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2967  Twine(AFI->createPICLabelUId())
2968  );
2969  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2970  dl, PtrVT);
2971  return LowerGlobalAddress(GA, DAG);
2972  }
2973 
2974  if (CP->isMachineConstantPoolEntry())
2975  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2976  CP->getAlignment());
2977  else
2978  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2979  CP->getAlignment());
2980  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2981 }
2982 
2985 }
2986 
2987 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2988  SelectionDAG &DAG) const {
2989  MachineFunction &MF = DAG.getMachineFunction();
2991  unsigned ARMPCLabelIndex = 0;
2992  SDLoc DL(Op);
2993  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2994  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2995  SDValue CPAddr;
2996  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2997  if (!IsPositionIndependent) {
2998  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2999  } else {
3000  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3001  ARMPCLabelIndex = AFI->createPICLabelUId();
3002  ARMConstantPoolValue *CPV =
3003  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3004  ARMCP::CPBlockAddress, PCAdj);
3005  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3006  }
3007  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3008  SDValue Result = DAG.getLoad(
3009  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3011  if (!IsPositionIndependent)
3012  return Result;
3013  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3014  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3015 }
3016 
3017 /// Convert a TLS address reference into the correct sequence of loads
3018 /// and calls to compute the variable's address for Darwin, and return an
3019 /// SDValue containing the final node.
3020 
3021 /// Darwin only has one TLS scheme which must be capable of dealing with the
3022 /// fully general situation, in the worst case. This means:
3023 /// + "extern __thread" declaration.
3024 /// + Defined in a possibly unknown dynamic library.
3025 ///
3026 /// The general system is that each __thread variable has a [3 x i32] descriptor
3027 /// which contains information used by the runtime to calculate the address. The
3028 /// only part of this the compiler needs to know about is the first word, which
3029 /// contains a function pointer that must be called with the address of the
3030 /// entire descriptor in "r0".
3031 ///
3032 /// Since this descriptor may be in a different unit, in general access must
3033 /// proceed along the usual ARM rules. A common sequence to produce is:
3034 ///
3035 /// movw rT1, :lower16:_var$non_lazy_ptr
3036 /// movt rT1, :upper16:_var$non_lazy_ptr
3037 /// ldr r0, [rT1]
3038 /// ldr rT2, [r0]
3039 /// blx rT2
3040 /// [...address now in r0...]
3041 SDValue
3042 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3043  SelectionDAG &DAG) const {
3044  assert(Subtarget->isTargetDarwin() &&
3045  "This function expects a Darwin target");
3046  SDLoc DL(Op);
3047 
3048  // First step is to get the address of the actua global symbol. This is where
3049  // the TLS descriptor lives.
3050  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3051 
3052  // The first entry in the descriptor is a function pointer that we must call
3053  // to obtain the address of the variable.
3054  SDValue Chain = DAG.getEntryNode();
3055  SDValue FuncTLVGet = DAG.getLoad(
3056  MVT::i32, DL, Chain, DescAddr,
3058  /* Alignment = */ 4,
3061  Chain = FuncTLVGet.getValue(1);
3062 
3064  MachineFrameInfo &MFI = F.getFrameInfo();
3065  MFI.setAdjustsStack(true);
3066 
3067  // TLS calls preserve all registers except those that absolutely must be
3068  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3069  // silly).
3070  auto TRI =
3071  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3072  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3073  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3074 
3075  // Finally, we can make the call. This is just a degenerate version of a
3076  // normal AArch64 call node: r0 takes the address of the descriptor, and
3077  // returns the address of the variable in this thread.
3078  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3079  Chain =
3081  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3082  DAG.getRegisterMask(Mask), Chain.getValue(1));
3083  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3084 }
3085 
3086 SDValue
3087 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3088  SelectionDAG &DAG) const {
3089  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3090 
3091  SDValue Chain = DAG.getEntryNode();
3092  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3093  SDLoc DL(Op);
3094 
3095  // Load the current TEB (thread environment block)
3096  SDValue Ops[] = {Chain,
3097  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3098  DAG.getConstant(15, DL, MVT::i32),
3099  DAG.getConstant(0, DL, MVT::i32),
3100  DAG.getConstant(13, DL, MVT::i32),
3101  DAG.getConstant(0, DL, MVT::i32),
3102  DAG.getConstant(2, DL, MVT::i32)};
3103  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3104  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3105 
3106  SDValue TEB = CurrentTEB.getValue(0);
3107  Chain = CurrentTEB.getValue(1);
3108 
3109  // Load the ThreadLocalStoragePointer from the TEB
3110  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3111  SDValue TLSArray =
3112  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3113  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3114 
3115  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3116  // offset into the TLSArray.
3117 
3118  // Load the TLS index from the C runtime
3119  SDValue TLSIndex =
3120  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3121  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3122  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3123 
3124  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3125  DAG.getConstant(2, DL, MVT::i32));
3126  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3127  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3128  MachinePointerInfo());
3129 
3130  // Get the offset of the start of the .tls section (section base)
3131  const auto *GA = cast<GlobalAddressSDNode>(Op);
3132  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3133  SDValue Offset = DAG.getLoad(
3134  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3135  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3137 
3138  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3139 }
3140 
3141 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3142 SDValue
3143 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3144  SelectionDAG &DAG) const {
3145  SDLoc dl(GA);
3146  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3147  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3148  MachineFunction &MF = DAG.getMachineFunction();
3150  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3151  ARMConstantPoolValue *CPV =
3152  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3153  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3154  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3155  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3156  Argument = DAG.getLoad(
3157  PtrVT, dl, DAG.getEntryNode(), Argument,
3159  SDValue Chain = Argument.getValue(1);
3160 
3161  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3162  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3163 
3164  // call __tls_get_addr.
3165  ArgListTy Args;
3167  Entry.Node = Argument;
3168  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3169  Args.push_back(Entry);
3170 
3171  // FIXME: is there useful debug info available here?
3173  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3175  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3176 
3177  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3178  return CallResult.first;
3179 }
3180 
3181 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3182 // "local exec" model.
3183 SDValue
3184 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3185  SelectionDAG &DAG,
3186  TLSModel::Model model) const {
3187  const GlobalValue *GV = GA->getGlobal();
3188  SDLoc dl(GA);
3189  SDValue Offset;
3190  SDValue Chain = DAG.getEntryNode();
3191  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3192  // Get the Thread Pointer
3194 
3195  if (model == TLSModel::InitialExec) {
3196  MachineFunction &MF = DAG.getMachineFunction();
3198  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3199  // Initial exec model.
3200  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3201  ARMConstantPoolValue *CPV =
3202  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3204  true);
3205  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3206  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3207  Offset = DAG.getLoad(
3208  PtrVT, dl, Chain, Offset,
3210  Chain = Offset.getValue(1);
3211 
3212  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3213  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3214 
3215  Offset = DAG.getLoad(
3216  PtrVT, dl, Chain, Offset,
3218  } else {
3219  // local exec model
3220  assert(model == TLSModel::LocalExec);
3221  ARMConstantPoolValue *CPV =
3223  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3224  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3225  Offset = DAG.getLoad(
3226  PtrVT, dl, Chain, Offset,
3228  }
3229 
3230  // The address of the thread local variable is the add of the thread
3231  // pointer with the offset of the variable.
3232  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3233 }
3234 
3235 SDValue
3236 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3237  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3238  if (DAG.getTarget().useEmulatedTLS())
3239  return LowerToTLSEmulatedModel(GA, DAG);
3240 
3241  if (Subtarget->isTargetDarwin())
3242  return LowerGlobalTLSAddressDarwin(Op, DAG);
3243 
3244  if (Subtarget->isTargetWindows())
3245  return LowerGlobalTLSAddressWindows(Op, DAG);
3246 
3247  // TODO: implement the "local dynamic" model
3248  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3250 
3251  switch (model) {
3254  return LowerToTLSGeneralDynamicModel(GA, DAG);
3255  case TLSModel::InitialExec:
3256  case TLSModel::LocalExec:
3257  return LowerToTLSExecModels(GA, DAG, model);
3258  }
3259  llvm_unreachable("bogus TLS model");
3260 }
3261 
3262 /// Return true if all users of V are within function F, looking through
3263 /// ConstantExprs.
3264 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3265  SmallVector<const User*,4> Worklist;
3266  for (auto *U : V->users())
3267  Worklist.push_back(U);
3268  while (!Worklist.empty()) {
3269  auto *U = Worklist.pop_back_val();
3270  if (isa<ConstantExpr>(U)) {
3271  for (auto *UU : U->users())
3272  Worklist.push_back(UU);
3273  continue;
3274  }
3275 
3276  auto *I = dyn_cast<Instruction>(U);
3277  if (!I || I->getParent()->getParent() != F)
3278  return false;
3279  }
3280  return true;
3281 }
3282 
3284  const GlobalValue *GV, SelectionDAG &DAG,
3285  EVT PtrVT, const SDLoc &dl) {
3286  // If we're creating a pool entry for a constant global with unnamed address,
3287  // and the global is small enough, we can emit it inline into the constant pool
3288  // to save ourselves an indirection.
3289  //
3290  // This is a win if the constant is only used in one function (so it doesn't
3291  // need to be duplicated) or duplicating the constant wouldn't increase code
3292  // size (implying the constant is no larger than 4 bytes).
3293  const Function &F = DAG.getMachineFunction().getFunction();
3294 
3295  // We rely on this decision to inline being idemopotent and unrelated to the
3296  // use-site. We know that if we inline a variable at one use site, we'll
3297  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3298  // doesn't know about this optimization, so bail out if it's enabled else
3299  // we could decide to inline here (and thus never emit the GV) but require
3300  // the GV from fast-isel generated code.
3301  if (!EnableConstpoolPromotion ||
3303  return SDValue();
3304 
3305  auto *GVar = dyn_cast<GlobalVariable>(GV);
3306  if (!GVar || !GVar->hasInitializer() ||
3307  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3308  !GVar->hasLocalLinkage())
3309  return SDValue();
3310 
3311  // If we inline a value that contains relocations, we move the relocations
3312  // from .data to .text. This is not allowed in position-independent code.
3313  auto *Init = GVar->getInitializer();
3314  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3315  Init->needsRelocation())
3316  return SDValue();
3317 
3318  // The constant islands pass can only really deal with alignment requests
3319  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3320  // any type wanting greater alignment requirements than 4 bytes. We also
3321  // can only promote constants that are multiples of 4 bytes in size or
3322  // are paddable to a multiple of 4. Currently we only try and pad constants
3323  // that are strings for simplicity.
3324  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3325  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3326  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3327  unsigned RequiredPadding = 4 - (Size % 4);
3328  bool PaddingPossible =
3329  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3330  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3331  Size == 0)
3332  return SDValue();
3333 
3334  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3335  MachineFunction &MF = DAG.getMachineFunction();
3337 
3338  // We can't bloat the constant pool too much, else the ConstantIslands pass
3339  // may fail to converge. If we haven't promoted this global yet (it may have
3340  // multiple uses), and promoting it would increase the constant pool size (Sz
3341  // > 4), ensure we have space to do so up to MaxTotal.
3342  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3343  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3345  return SDValue();
3346 
3347  // This is only valid if all users are in a single function; we can't clone
3348  // the constant in general. The LLVM IR unnamed_addr allows merging
3349  // constants, but not cloning them.
3350  //
3351  // We could potentially allow cloning if we could prove all uses of the
3352  // constant in the current function don't care about the address, like
3353  // printf format strings. But that isn't implemented for now.
3354  if (!allUsersAreInFunction(GVar, &F))
3355  return SDValue();
3356 
3357  // We're going to inline this global. Pad it out if needed.
3358  if (RequiredPadding != 4) {
3359  StringRef S = CDAInit->getAsString();
3360 
3362  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3363  while (RequiredPadding--)
3364  V.push_back(0);
3365  Init = ConstantDataArray::get(*DAG.getContext(), V);
3366  }
3367 
3368  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3369  SDValue CPAddr =
3370  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3371  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3374  PaddedSize - 4);
3375  }
3376  ++NumConstpoolPromoted;
3377  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3378 }
3379 
3381  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3382  if (!(GV = GA->getBaseObject()))
3383  return false;
3384  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3385  return V->isConstant();
3386  return isa<Function>(GV);
3387 }
3388 
3389 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3390  SelectionDAG &DAG) const {
3391  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3392  default: llvm_unreachable("unknown object format");
3393  case Triple::COFF:
3394  return LowerGlobalAddressWindows(Op, DAG);
3395  case Triple::ELF:
3396  return LowerGlobalAddressELF(Op, DAG);
3397  case Triple::MachO:
3398  return LowerGlobalAddressDarwin(Op, DAG);
3399  }
3400 }
3401 
3402 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3403  SelectionDAG &DAG) const {
3404  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3405  SDLoc dl(Op);
3406  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3407  const TargetMachine &TM = getTargetMachine();
3408  bool IsRO = isReadOnly(GV);
3409 
3410  // promoteToConstantPool only if not generating XO text section
3411  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3412  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3413  return V;
3414 
3415  if (isPositionIndependent()) {
3416  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3417  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3418  UseGOT_PREL ? ARMII::MO_GOT : 0);
3419  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3420  if (UseGOT_PREL)
3421  Result =
3422  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3424  return Result;
3425  } else if (Subtarget->isROPI() && IsRO) {
3426  // PC-relative.
3427  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3428  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3429  return Result;
3430  } else if (Subtarget->isRWPI() && !IsRO) {
3431  // SB-relative.
3432  SDValue RelAddr;
3433  if (Subtarget->useMovt()) {
3434  ++NumMovwMovt;
3435  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3436  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3437  } else { // use literal pool for address constant
3438  ARMConstantPoolValue *CPV =
3440  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3441  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3442  RelAddr = DAG.getLoad(
3443  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3445  }
3446  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3447  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3448  return Result;
3449  }
3450 
3451  // If we have T2 ops, we can materialize the address directly via movt/movw
3452  // pair. This is always cheaper.
3453  if (Subtarget->useMovt()) {
3454  ++NumMovwMovt;
3455  // FIXME: Once remat is capable of dealing with instructions with register
3456  // operands, expand this into two nodes.
3457  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3458  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3459  } else {
3460  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3461  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3462  return DAG.getLoad(
3463  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3465  }
3466 }
3467 
3468 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3469  SelectionDAG &DAG) const {
3470  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3471  "ROPI/RWPI not currently supported for Darwin");
3472  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3473  SDLoc dl(Op);
3474  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3475 
3476  if (Subtarget->useMovt())
3477  ++NumMovwMovt;
3478 
3479  // FIXME: Once remat is capable of dealing with instructions with register
3480  // operands, expand this into multiple nodes
3481  unsigned Wrapper =
3483 
3484  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3485  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3486 
3487  if (Subtarget->isGVIndirectSymbol(GV))
3488  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3490  return Result;
3491 }
3492 
3493 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3494  SelectionDAG &DAG) const {
3495  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3496  assert(Subtarget->useMovt() &&
3497  "Windows on ARM expects to use movw/movt");
3498  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3499  "ROPI/RWPI not currently supported for Windows");
3500 
3501  const TargetMachine &TM = getTargetMachine();
3502  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3503  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3504  if (GV->hasDLLImportStorageClass())
3505  TargetFlags = ARMII::MO_DLLIMPORT;
3506  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3507  TargetFlags = ARMII::MO_COFFSTUB;
3508  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3509  SDValue Result;
3510  SDLoc DL(Op);
3511 
3512  ++NumMovwMovt;
3513 
3514  // FIXME: Once remat is capable of dealing with instructions with register
3515  // operands, expand this into two nodes.
3516  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3517  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3518  TargetFlags));
3519  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3520  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3522  return Result;
3523 }
3524 
3525 SDValue
3526 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3527  SDLoc dl(Op);
3528  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3529  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3530  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3531  Op.getOperand(1), Val);
3532 }
3533 
3534 SDValue
3535 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3536  SDLoc dl(Op);
3537  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3538  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3539 }
3540 
3541 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3542  SelectionDAG &DAG) const {
3543  SDLoc dl(Op);
3545  Op.getOperand(0));
3546 }
3547 
3548 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3549  SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3550  unsigned IntNo =
3551  cast<ConstantSDNode>(
3553  ->getZExtValue();
3554  switch (IntNo) {
3555  default:
3556  return SDValue(); // Don't custom lower most intrinsics.
3557  case Intrinsic::arm_gnu_eabi_mcount: {
3558  MachineFunction &MF = DAG.getMachineFunction();
3559  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3560  SDLoc dl(Op);
3561  SDValue Chain = Op.getOperand(0);
3562  // call "\01__gnu_mcount_nc"
3563  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3564  const uint32_t *Mask =
3566  assert(Mask && "Missing call preserved mask for calling convention");
3567  // Mark LR an implicit live-in.
3568  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3569  SDValue ReturnAddress =
3570  DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3571  std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
3572  SDValue Callee =
3573  DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3574  SDValue RegisterMask = DAG.getRegisterMask(Mask);
3575  if (Subtarget->isThumb())
3576  return SDValue(
3577  DAG.getMachineNode(
3578  ARM::tBL_PUSHLR, dl, ResultTys,
3579  {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3580  DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3581  0);
3582  return SDValue(
3583  DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3584  {ReturnAddress, Callee, RegisterMask, Chain}),
3585  0);
3586  }
3587  }
3588 }
3589 
3590 SDValue
3591 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3592  const ARMSubtarget *Subtarget) const {
3593  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3594  SDLoc dl(Op);
3595  switch (IntNo) {
3596  default: return SDValue(); // Don't custom lower most intrinsics.
3597  case Intrinsic::thread_pointer: {
3598  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3599  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3600  }
3601  case Intrinsic::eh_sjlj_lsda: {
3602  MachineFunction &MF = DAG.getMachineFunction();
3604  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3605  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3606  SDValue CPAddr;
3607  bool IsPositionIndependent = isPositionIndependent();
3608  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3609  ARMConstantPoolValue *CPV =
3610  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3611  ARMCP::CPLSDA, PCAdj);
3612  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3613  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3614  SDValue Result = DAG.getLoad(
3615  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3617 
3618  if (IsPositionIndependent) {
3619  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3620  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3621  }
3622  return Result;
3623  }
3624  case Intrinsic::arm_neon_vabs:
3625  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3626  Op.getOperand(1));
3627  case Intrinsic::arm_neon_vmulls:
3628  case Intrinsic::arm_neon_vmullu: {
3629  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3631  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3632  Op.getOperand(1), Op.getOperand(2));
3633  }
3634  case Intrinsic::arm_neon_vminnm:
3635  case Intrinsic::arm_neon_vmaxnm: {
3636  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3638  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3639  Op.getOperand(1), Op.getOperand(2));
3640  }
3641  case Intrinsic::arm_neon_vminu:
3642  case Intrinsic::arm_neon_vmaxu: {
3643  if (Op.getValueType().isFloatingPoint())
3644  return SDValue();
3645  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3646  ? ISD::UMIN : ISD::UMAX;
3647  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3648  Op.getOperand(1), Op.getOperand(2));
3649  }
3650  case Intrinsic::arm_neon_vmins:
3651  case Intrinsic::arm_neon_vmaxs: {
3652  // v{min,max}s is overloaded between signed integers and floats.
3653  if (!Op.getValueType().isFloatingPoint()) {
3654  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3655  ? ISD::SMIN : ISD::SMAX;
3656  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3657  Op.getOperand(1), Op.getOperand(2));
3658  }
3659  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3661  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3662  Op.getOperand(1), Op.getOperand(2));
3663  }
3664  case Intrinsic::arm_neon_vtbl1:
3665  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3666  Op.getOperand(1), Op.getOperand(2));
3667  case Intrinsic::arm_neon_vtbl2:
3668  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3669  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3670  }
3671 }
3672 
3674  const ARMSubtarget *Subtarget) {
3675  SDLoc dl(Op);
3676  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3677  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3678  if (SSID == SyncScope::SingleThread)
3679  return Op;
3680 
3681  if (!Subtarget->hasDataBarrier()) {
3682  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3683  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3684  // here.
3685  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3686  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3687  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3688  DAG.getConstant(0, dl, MVT::i32));
3689  }
3690 
3691  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3692  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3693  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3694  if (Subtarget->isMClass()) {
3695  // Only a full system barrier exists in the M-class architectures.
3696  Domain = ARM_MB::SY;
3697  } else if (Subtarget->preferISHSTBarriers() &&
3698  Ord == AtomicOrdering::Release) {
3699  // Swift happens to implement ISHST barriers in a way that's compatible with
3700  // Release semantics but weaker than ISH so we'd be fools not to use
3701  // it. Beware: other processors probably don't!
3702  Domain = ARM_MB::ISHST;
3703  }
3704 
3705  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3706  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3707  DAG.getConstant(Domain, dl, MVT::i32));
3708 }
3709 
3711  const ARMSubtarget *Subtarget) {
3712  // ARM pre v5TE and Thumb1 does not have preload instructions.
3713  if (!(Subtarget->isThumb2() ||
3714  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3715  // Just preserve the chain.
3716  return Op.getOperand(0);
3717 
3718  SDLoc dl(Op);
3719  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3720  if (!isRead &&
3721  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3722  // ARMv7 with MP extension has PLDW.
3723  return Op.getOperand(0);
3724 
3725  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3726  if (Subtarget->isThumb()) {
3727  // Invert the bits.
3728  isRead = ~isRead & 1;
3729  isData = ~isData & 1;
3730  }
3731 
3732  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3733  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3734  DAG.getConstant(isData, dl, MVT::i32));
3735 }
3736 
3738  MachineFunction &MF = DAG.getMachineFunction();
3739  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3740 
3741  // vastart just stores the address of the VarArgsFrameIndex slot into the
3742  // memory location argument.
3743  SDLoc dl(Op);
3744  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3745  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3746  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3747  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3748  MachinePointerInfo(SV));
3749 }
3750 
3751 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3752  CCValAssign &NextVA,
3753  SDValue &Root,
3754  SelectionDAG &DAG,
3755  const SDLoc &dl) const {
3756  MachineFunction &MF = DAG.getMachineFunction();
3758 
3759  const TargetRegisterClass *RC;
3760  if (AFI->isThumb1OnlyFunction())
3761  RC = &ARM::tGPRRegClass;
3762  else
3763  RC = &ARM::GPRRegClass;
3764 
3765  // Transform the arguments stored in physical registers into virtual ones.
3766  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3767  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3768 
3769  SDValue ArgValue2;
3770  if (NextVA.isMemLoc()) {
3771  MachineFrameInfo &MFI = MF.getFrameInfo();
3772  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3773 
3774  // Create load node to retrieve arguments from the stack.
3775  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3776  ArgValue2 = DAG.getLoad(
3777  MVT::i32, dl, Root, FIN,
3779  } else {
3780  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3781  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3782  }
3783  if (!Subtarget->isLittle())
3784  std::swap (ArgValue, ArgValue2);
3785  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3786 }
3787 
3788 // The remaining GPRs hold either the beginning of variable-argument
3789 // data, or the beginning of an aggregate passed by value (usually
3790 // byval). Either way, we allocate stack slots adjacent to the data
3791 // provided by our caller, and store the unallocated registers there.
3792 // If this is a variadic function, the va_list pointer will begin with
3793 // these values; otherwise, this reassembles a (byval) structure that
3794 // was split between registers and memory.
3795 // Return: The frame index registers were stored into.
3796 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3797  const SDLoc &dl, SDValue &Chain,
3798  const Value *OrigArg,
3799  unsigned InRegsParamRecordIdx,
3800  int ArgOffset, unsigned ArgSize) const {
3801  // Currently, two use-cases possible:
3802  // Case #1. Non-var-args function, and we meet first byval parameter.
3803  // Setup first unallocated register as first byval register;
3804  // eat all remained registers
3805  // (these two actions are performed by HandleByVal method).
3806  // Then, here, we initialize stack frame with
3807  // "store-reg" instructions.
3808  // Case #2. Var-args function, that doesn't contain byval parameters.
3809  // The same: eat all remained unallocated registers,
3810  // initialize stack frame.
3811 
3812  MachineFunction &MF = DAG.getMachineFunction();
3813  MachineFrameInfo &MFI = MF.getFrameInfo();
3815  unsigned RBegin, REnd;
3816  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3817  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3818  } else {
3819  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3820  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3821  REnd = ARM::R4;
3822  }
3823 
3824  if (REnd != RBegin)
3825  ArgOffset = -4 * (ARM::R4 - RBegin);
3826 
3827  auto PtrVT = getPointerTy(DAG.getDataLayout());
3828  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3829  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3830 
3831  SmallVector<SDValue, 4> MemOps;
3832  const TargetRegisterClass *RC =
3833  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3834 
3835  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3836  unsigned VReg = MF.addLiveIn(Reg, RC);
3837  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3838  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3839  MachinePointerInfo(OrigArg, 4 * i));
3840  MemOps.push_back(Store);
3841  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3842  }
3843 
3844  if (!MemOps.empty())
3845  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3846  return FrameIndex;
3847 }
3848 
3849 // Setup stack frame, the va_list pointer will start from.
3850 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3851  const SDLoc &dl, SDValue &Chain,
3852  unsigned ArgOffset,
3853  unsigned TotalArgRegsSaveSize,
3854  bool ForceMutable) const {
3855  MachineFunction &MF = DAG.getMachineFunction();
3857 
3858  // Try to store any remaining integer argument regs
3859  // to their spots on the stack so that they may be loaded by dereferencing
3860  // the result of va_next.
3861  // If there is no regs to be stored, just point address after last
3862  // argument passed via stack.
3863  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3864  CCInfo.getInRegsParamsCount(),
3865  CCInfo.getNextStackOffset(),
3866  std::max(4U, TotalArgRegsSaveSize));
3867  AFI->setVarArgsFrameIndex(FrameIndex);
3868 }
3869 
3870 SDValue ARMTargetLowering::LowerFormalArguments(
3871  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3872  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3873  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3874  MachineFunction &MF = DAG.getMachineFunction();
3875  MachineFrameInfo &MFI = MF.getFrameInfo();
3876 
3878 
3879  // Assign locations to all of the incoming arguments.
3881  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3882  *DAG.getContext());
3883  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3884 
3885  SmallVector<SDValue, 16> ArgValues;
3886  SDValue ArgValue;
3888  unsigned CurArgIdx = 0;
3889 
3890  // Initially ArgRegsSaveSize is zero.
3891  // Then we increase this value each time we meet byval parameter.
3892  // We also increase this value in case of varargs function.
3893  AFI->setArgRegsSaveSize(0);
3894 
3895  // Calculate the amount of stack space that we need to allocate to store
3896  // byval and variadic arguments that are passed in registers.
3897  // We need to know this before we allocate the first byval or variadic
3898  // argument, as they will be allocated a stack slot below the CFA (Canonical
3899  // Frame Address, the stack pointer at entry to the function).
3900  unsigned ArgRegBegin = ARM::R4;
3901  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3902  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3903  break;
3904 
3905  CCValAssign &VA = ArgLocs[i];
3906  unsigned Index = VA.getValNo();
3907  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3908  if (!Flags.isByVal())
3909  continue;
3910 
3911  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3912  unsigned RBegin, REnd;
3913  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3914  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3915 
3916  CCInfo.nextInRegsParam();
3917  }
3918  CCInfo.rewindByValRegsInfo();
3919 
3920  int lastInsIndex = -1;
3921  if (isVarArg && MFI.hasVAStart()) {
3922  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3923  if (RegIdx != array_lengthof(GPRArgRegs))
3924  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3925  }
3926 
3927  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3928  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3929  auto PtrVT = getPointerTy(DAG.getDataLayout());
3930 
3931  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3932  CCValAssign &VA = ArgLocs[i];
3933  if (Ins[VA.getValNo()].isOrigArg()) {
3934  std::advance(CurOrigArg,
3935  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3936  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3937  }
3938  // Arguments stored in registers.
3939  if (VA.isRegLoc()) {
3940  EVT RegVT = VA.getLocVT();
3941 
3942  if (VA.needsCustom()) {
3943  // f64 and vector types are split up into multiple registers or
3944  // combinations of registers and stack slots.
3945  if (VA.getLocVT() == MVT::v2f64) {
3946  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3947  Chain, DAG, dl);
3948  VA = ArgLocs[++i]; // skip ahead to next loc
3949  SDValue ArgValue2;
3950  if (VA.isMemLoc()) {
3951  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3952  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3953  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3955  DAG.getMachineFunction(), FI));
3956  } else {
3957  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3958  Chain, DAG, dl);
3959  }
3960  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3961  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3962  ArgValue, ArgValue1,
3963  DAG.getIntPtrConstant(0, dl));
3964  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3965  ArgValue, ArgValue2,
3966  DAG.getIntPtrConstant(1, dl));
3967  } else
3968  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3969  } else {
3970  const TargetRegisterClass *RC;
3971 
3972 
3973  if (RegVT == MVT::f16)
3974  RC = &ARM::HPRRegClass;
3975  else if (RegVT == MVT::f32)
3976  RC = &ARM::SPRRegClass;
3977  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3978  RC = &ARM::DPRRegClass;
3979  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3980  RC = &ARM::QPRRegClass;
3981  else if (RegVT == MVT::i32)
3982  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3983  : &ARM::GPRRegClass;
3984  else
3985  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3986 
3987  // Transform the arguments in physical registers into virtual ones.
3988  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3989  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3990 
3991  // If this value is passed in r0 and has the returned attribute (e.g.
3992  // C++ 'structors), record this fact for later use.
3993  if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
3994  AFI->setPreservesR0();
3995  }
3996  }
3997 
3998  // If this is an 8 or 16-bit value, it is really passed promoted
3999  // to 32 bits. Insert an assert[sz]ext to capture this, then
4000  // truncate to the right size.
4001  switch (VA.getLocInfo()) {
4002  default: llvm_unreachable("Unknown loc info!");
4003  case CCValAssign::Full: break;
4004  case CCValAssign::BCvt:
4005  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4006  break;
4007  case CCValAssign::SExt:
4008  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4009  DAG.getValueType(VA.getValVT()));
4010  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4011  break;
4012  case CCValAssign::ZExt:
4013  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4014  DAG.getValueType(VA.getValVT()));
4015  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4016  break;
4017  }
4018 
4019  InVals.push_back(ArgValue);
4020  } else { // VA.isRegLoc()
4021  // sanity check
4022  assert(VA.isMemLoc());
4023  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4024 
4025  int index = VA.getValNo();
4026 
4027  // Some Ins[] entries become multiple ArgLoc[] entries.
4028  // Process them only once.
4029  if (index != lastInsIndex)
4030  {
4031  ISD::ArgFlagsTy Flags = Ins[index].Flags;
4032  // FIXME: For now, all byval parameter objects are marked mutable.
4033  // This can be changed with more analysis.
4034  // In case of tail call optimization mark all arguments mutable.
4035  // Since they could be overwritten by lowering of arguments in case of
4036  // a tail call.
4037  if (Flags.isByVal()) {
4038  assert(Ins[index].isOrigArg() &&
4039  "Byval arguments cannot be implicit");
4040  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4041 
4042  int FrameIndex = StoreByValRegs(
4043  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4044  VA.getLocMemOffset(), Flags.getByValSize());
4045  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4046  CCInfo.nextInRegsParam();
4047  } else {
4048  unsigned FIOffset = VA.getLocMemOffset();
4049  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4050  FIOffset, true);
4051