LLVM  10.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 #define DEBUG_TYPE "arm-isel"
119 
120 STATISTIC(NumTailCalls, "Number of tail calls");
121 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123 STATISTIC(NumConstpoolPromoted,
124  "Number of constants with their storage promoted into constant pools");
125 
126 static cl::opt<bool>
127 ARMInterworking("arm-interworking", cl::Hidden,
128  cl::desc("Enable / disable ARM interworking (for debugging only)"),
129  cl::init(true));
130 
132  "arm-promote-constant", cl::Hidden,
133  cl::desc("Enable / disable promotion of unnamed_addr constants into "
134  "constant pools"),
135  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137  "arm-promote-constant-max-size", cl::Hidden,
138  cl::desc("Maximum size of constant to promote into a constant pool"),
139  cl::init(64));
141  "arm-promote-constant-max-total", cl::Hidden,
142  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143  cl::init(128));
144 
145 // The APCS parameter registers.
146 static const MCPhysReg GPRArgRegs[] = {
147  ARM::R0, ARM::R1, ARM::R2, ARM::R3
148 };
149 
150 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151  MVT PromotedBitwiseVT) {
152  if (VT != PromotedLdStVT) {
153  setOperationAction(ISD::LOAD, VT, Promote);
154  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155 
156  setOperationAction(ISD::STORE, VT, Promote);
157  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158  }
159 
160  MVT ElemTy = VT.getVectorElementType();
161  if (ElemTy != MVT::f64)
162  setOperationAction(ISD::SETCC, VT, Custom);
163  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
164  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
165  if (ElemTy == MVT::i32) {
166  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
167  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
168  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
169  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
170  } else {
171  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
172  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
173  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
174  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
175  }
176  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
177  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
178  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
179  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
180  setOperationAction(ISD::SELECT, VT, Expand);
181  setOperationAction(ISD::SELECT_CC, VT, Expand);
182  setOperationAction(ISD::VSELECT, VT, Expand);
183  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
184  if (VT.isInteger()) {
185  setOperationAction(ISD::SHL, VT, Custom);
186  setOperationAction(ISD::SRA, VT, Custom);
187  setOperationAction(ISD::SRL, VT, Custom);
188  }
189 
190  // Promote all bit-wise operations.
191  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  setOperationAction(ISD::AND, VT, Promote);
193  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  setOperationAction(ISD::OR, VT, Promote);
195  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  setOperationAction(ISD::XOR, VT, Promote);
197  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198  }
199 
200  // Neon does not support vector divide/remainder operations.
201  setOperationAction(ISD::SDIV, VT, Expand);
202  setOperationAction(ISD::UDIV, VT, Expand);
203  setOperationAction(ISD::FDIV, VT, Expand);
204  setOperationAction(ISD::SREM, VT, Expand);
205  setOperationAction(ISD::UREM, VT, Expand);
206  setOperationAction(ISD::FREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212 }
213 
214 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215  addRegisterClass(VT, &ARM::DPRRegClass);
216  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217 }
218 
219 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220  addRegisterClass(VT, &ARM::DPairRegClass);
221  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222 }
223 
224 void ARMTargetLowering::setAllExpand(MVT VT) {
225  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
226  setOperationAction(Opc, VT, Expand);
227 
228  // We support these really simple operations even on types where all
229  // the actual arithmetic has to be broken down into simpler
230  // operations or turned into library calls.
231  setOperationAction(ISD::BITCAST, VT, Legal);
232  setOperationAction(ISD::LOAD, VT, Legal);
233  setOperationAction(ISD::STORE, VT, Legal);
234  setOperationAction(ISD::UNDEF, VT, Legal);
235 }
236 
237 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
238  LegalizeAction Action) {
239  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
240  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
241  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
242 }
243 
244 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
245  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
246 
247  for (auto VT : IntTypes) {
248  addRegisterClass(VT, &ARM::MQPRRegClass);
249  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
250  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
251  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
252  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
253  setOperationAction(ISD::SHL, VT, Custom);
254  setOperationAction(ISD::SRA, VT, Custom);
255  setOperationAction(ISD::SRL, VT, Custom);
256  setOperationAction(ISD::SMIN, VT, Legal);
257  setOperationAction(ISD::SMAX, VT, Legal);
258  setOperationAction(ISD::UMIN, VT, Legal);
259  setOperationAction(ISD::UMAX, VT, Legal);
260  setOperationAction(ISD::ABS, VT, Legal);
261  setOperationAction(ISD::SETCC, VT, Custom);
262  setOperationAction(ISD::MLOAD, VT, Custom);
263  setOperationAction(ISD::MSTORE, VT, Legal);
264  setOperationAction(ISD::CTLZ, VT, Legal);
265  setOperationAction(ISD::CTTZ, VT, Custom);
266  setOperationAction(ISD::BITREVERSE, VT, Legal);
267  setOperationAction(ISD::BSWAP, VT, Legal);
268  setOperationAction(ISD::SADDSAT, VT, Legal);
269  setOperationAction(ISD::UADDSAT, VT, Legal);
270  setOperationAction(ISD::SSUBSAT, VT, Legal);
271  setOperationAction(ISD::USUBSAT, VT, Legal);
272 
273  // No native support for these.
274  setOperationAction(ISD::UDIV, VT, Expand);
275  setOperationAction(ISD::SDIV, VT, Expand);
276  setOperationAction(ISD::UREM, VT, Expand);
277  setOperationAction(ISD::SREM, VT, Expand);
278  setOperationAction(ISD::CTPOP, VT, Expand);
279 
280  // Vector reductions
281  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
282  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
283  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
284  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
285  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
286 
287  if (!HasMVEFP) {
288  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
289  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
290  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
291  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
292  }
293 
294  // Pre and Post inc are supported on loads and stores
295  for (unsigned im = (unsigned)ISD::PRE_INC;
297  setIndexedLoadAction(im, VT, Legal);
298  setIndexedStoreAction(im, VT, Legal);
299  }
300  }
301 
302  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
303  for (auto VT : FloatTypes) {
304  addRegisterClass(VT, &ARM::MQPRRegClass);
305  if (!HasMVEFP)
306  setAllExpand(VT);
307 
308  // These are legal or custom whether we have MVE.fp or not
309  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
310  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
311  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
312  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
313  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
314  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
315  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
316  setOperationAction(ISD::SETCC, VT, Custom);
317  setOperationAction(ISD::MLOAD, VT, Custom);
318  setOperationAction(ISD::MSTORE, VT, Legal);
319 
320  // Pre and Post inc are supported on loads and stores
321  for (unsigned im = (unsigned)ISD::PRE_INC;
323  setIndexedLoadAction(im, VT, Legal);
324  setIndexedStoreAction(im, VT, Legal);
325  }
326 
327  if (HasMVEFP) {
328  setOperationAction(ISD::FMINNUM, VT, Legal);
329  setOperationAction(ISD::FMAXNUM, VT, Legal);
330  setOperationAction(ISD::FROUND, VT, Legal);
331 
332  // No native support for these.
333  setOperationAction(ISD::FDIV, VT, Expand);
334  setOperationAction(ISD::FREM, VT, Expand);
335  setOperationAction(ISD::FSQRT, VT, Expand);
336  setOperationAction(ISD::FSIN, VT, Expand);
337  setOperationAction(ISD::FCOS, VT, Expand);
338  setOperationAction(ISD::FPOW, VT, Expand);
339  setOperationAction(ISD::FLOG, VT, Expand);
340  setOperationAction(ISD::FLOG2, VT, Expand);
341  setOperationAction(ISD::FLOG10, VT, Expand);
342  setOperationAction(ISD::FEXP, VT, Expand);
343  setOperationAction(ISD::FEXP2, VT, Expand);
344  setOperationAction(ISD::FNEARBYINT, VT, Expand);
345  }
346  }
347 
348  // We 'support' these types up to bitcast/load/store level, regardless of
349  // MVE integer-only / float support. Only doing FP data processing on the FP
350  // vector types is inhibited at integer-only level.
351  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
352  for (auto VT : LongTypes) {
353  addRegisterClass(VT, &ARM::MQPRRegClass);
354  setAllExpand(VT);
355  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
356  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
357  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
358  }
359  // We can do bitwise operations on v2i64 vectors
360  setOperationAction(ISD::AND, MVT::v2i64, Legal);
361  setOperationAction(ISD::OR, MVT::v2i64, Legal);
362  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
363 
364  // It is legal to extload from v4i8 to v4i16 or v4i32.
365  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
366  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
367  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
368 
369  // Some truncating stores are legal too.
370  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
371  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
372  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
373 
374  // Pre and Post inc on these are legal, given the correct extends
375  for (unsigned im = (unsigned)ISD::PRE_INC;
377  setIndexedLoadAction(im, MVT::v8i8, Legal);
378  setIndexedStoreAction(im, MVT::v8i8, Legal);
379  setIndexedLoadAction(im, MVT::v4i8, Legal);
380  setIndexedStoreAction(im, MVT::v4i8, Legal);
381  setIndexedLoadAction(im, MVT::v4i16, Legal);
382  setIndexedStoreAction(im, MVT::v4i16, Legal);
383  }
384 
385  // Predicate types
386  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
387  for (auto VT : pTypes) {
388  addRegisterClass(VT, &ARM::VCCRRegClass);
389  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
390  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
391  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
392  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
393  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
394  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395  setOperationAction(ISD::SETCC, VT, Custom);
396  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
397  setOperationAction(ISD::LOAD, VT, Custom);
398  setOperationAction(ISD::STORE, VT, Custom);
399  }
400 }
401 
403  const ARMSubtarget &STI)
404  : TargetLowering(TM), Subtarget(&STI) {
405  RegInfo = Subtarget->getRegisterInfo();
406  Itins = Subtarget->getInstrItineraryData();
407 
410 
411  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
412  !Subtarget->isTargetWatchOS()) {
413  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
414  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
415  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
416  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
418  }
419 
420  if (Subtarget->isTargetMachO()) {
421  // Uses VFP for Thumb libfuncs if available.
422  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
423  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
424  static const struct {
425  const RTLIB::Libcall Op;
426  const char * const Name;
427  const ISD::CondCode Cond;
428  } LibraryCalls[] = {
429  // Single-precision floating-point arithmetic.
430  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
431  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
432  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
433  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
434 
435  // Double-precision floating-point arithmetic.
436  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
437  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
438  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
439  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
440 
441  // Single-precision comparisons.
442  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
443  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
444  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
445  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
446  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
447  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
448  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
449  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
450 
451  // Double-precision comparisons.
452  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
453  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
454  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
455  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
456  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
457  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
458  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
459  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
460 
461  // Floating-point to integer conversions.
462  // i64 conversions are done via library routines even when generating VFP
463  // instructions, so use the same ones.
464  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
465  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
466  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
467  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
468 
469  // Conversions between floating types.
470  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
471  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
472 
473  // Integer to floating-point conversions.
474  // i64 conversions are done via library routines even when generating VFP
475  // instructions, so use the same ones.
476  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
477  // e.g., __floatunsidf vs. __floatunssidfvfp.
478  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
479  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
480  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
481  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
482  };
483 
484  for (const auto &LC : LibraryCalls) {
485  setLibcallName(LC.Op, LC.Name);
486  if (LC.Cond != ISD::SETCC_INVALID)
487  setCmpLibcallCC(LC.Op, LC.Cond);
488  }
489  }
490  }
491 
492  // These libcalls are not available in 32-bit.
493  setLibcallName(RTLIB::SHL_I128, nullptr);
494  setLibcallName(RTLIB::SRL_I128, nullptr);
495  setLibcallName(RTLIB::SRA_I128, nullptr);
496 
497  // RTLIB
498  if (Subtarget->isAAPCS_ABI() &&
499  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
500  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
501  static const struct {
502  const RTLIB::Libcall Op;
503  const char * const Name;
504  const CallingConv::ID CC;
505  const ISD::CondCode Cond;
506  } LibraryCalls[] = {
507  // Double-precision floating-point arithmetic helper functions
508  // RTABI chapter 4.1.2, Table 2
509  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
510  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
511  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
512  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
513 
514  // Double-precision floating-point comparison helper functions
515  // RTABI chapter 4.1.2, Table 3
516  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
517  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
518  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
519  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
520  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
521  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
522  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
523  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
524 
525  // Single-precision floating-point arithmetic helper functions
526  // RTABI chapter 4.1.2, Table 4
527  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
528  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
529  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
530  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
531 
532  // Single-precision floating-point comparison helper functions
533  // RTABI chapter 4.1.2, Table 5
534  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
535  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
536  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
537  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
538  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
539  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
540  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
541  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
542 
543  // Floating-point to integer conversions.
544  // RTABI chapter 4.1.2, Table 6
545  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
546  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
549  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
550  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
551  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
552  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
553 
554  // Conversions between floating types.
555  // RTABI chapter 4.1.2, Table 7
556  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
557  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
558  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
559 
560  // Integer to floating-point conversions.
561  // RTABI chapter 4.1.2, Table 8
562  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
565  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
566  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
567  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
568  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
569  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
570 
571  // Long long helper functions
572  // RTABI chapter 4.2, Table 9
573  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
574  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
575  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
576  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
577 
578  // Integer division functions
579  // RTABI chapter 4.3.1
580  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
582  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
583  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
585  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
586  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
587  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
588  };
589 
590  for (const auto &LC : LibraryCalls) {
591  setLibcallName(LC.Op, LC.Name);
592  setLibcallCallingConv(LC.Op, LC.CC);
593  if (LC.Cond != ISD::SETCC_INVALID)
594  setCmpLibcallCC(LC.Op, LC.Cond);
595  }
596 
597  // EABI dependent RTLIB
598  if (TM.Options.EABIVersion == EABI::EABI4 ||
600  static const struct {
601  const RTLIB::Libcall Op;
602  const char *const Name;
603  const CallingConv::ID CC;
604  const ISD::CondCode Cond;
605  } MemOpsLibraryCalls[] = {
606  // Memory operations
607  // RTABI chapter 4.3.4
609  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611  };
612 
613  for (const auto &LC : MemOpsLibraryCalls) {
614  setLibcallName(LC.Op, LC.Name);
615  setLibcallCallingConv(LC.Op, LC.CC);
616  if (LC.Cond != ISD::SETCC_INVALID)
617  setCmpLibcallCC(LC.Op, LC.Cond);
618  }
619  }
620  }
621 
622  if (Subtarget->isTargetWindows()) {
623  static const struct {
624  const RTLIB::Libcall Op;
625  const char * const Name;
626  const CallingConv::ID CC;
627  } LibraryCalls[] = {
628  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
629  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
630  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
631  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
632  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
633  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
634  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
635  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
636  };
637 
638  for (const auto &LC : LibraryCalls) {
639  setLibcallName(LC.Op, LC.Name);
640  setLibcallCallingConv(LC.Op, LC.CC);
641  }
642  }
643 
644  // Use divmod compiler-rt calls for iOS 5.0 and later.
645  if (Subtarget->isTargetMachO() &&
646  !(Subtarget->isTargetIOS() &&
647  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
648  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
649  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
650  }
651 
652  // The half <-> float conversion functions are always soft-float on
653  // non-watchos platforms, but are needed for some targets which use a
654  // hard-float calling convention by default.
655  if (!Subtarget->isTargetWatchABI()) {
656  if (Subtarget->isAAPCS_ABI()) {
657  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
658  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
659  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
660  } else {
661  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
662  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
663  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
664  }
665  }
666 
667  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
668  // a __gnu_ prefix (which is the default).
669  if (Subtarget->isTargetAEABI()) {
670  static const struct {
671  const RTLIB::Libcall Op;
672  const char * const Name;
673  const CallingConv::ID CC;
674  } LibraryCalls[] = {
675  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
676  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
677  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
678  };
679 
680  for (const auto &LC : LibraryCalls) {
681  setLibcallName(LC.Op, LC.Name);
682  setLibcallCallingConv(LC.Op, LC.CC);
683  }
684  }
685 
686  if (Subtarget->isThumb1Only())
687  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
688  else
689  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
690 
691  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
692  Subtarget->hasFPRegs()) {
693  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
694  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
695  if (!Subtarget->hasVFP2Base())
696  setAllExpand(MVT::f32);
697  if (!Subtarget->hasFP64())
698  setAllExpand(MVT::f64);
699  }
700 
701  if (Subtarget->hasFullFP16()) {
702  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
706 
709  }
710 
711  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
712  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
713  setTruncStoreAction(VT, InnerVT, Expand);
714  addAllExtLoads(VT, InnerVT, Expand);
715  }
716 
721 
723  }
724 
727 
730 
731  if (Subtarget->hasMVEIntegerOps())
732  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
733 
734  // Combine low-overhead loop intrinsics so that we can lower i1 types.
735  if (Subtarget->hasLOB()) {
738  }
739 
740  if (Subtarget->hasNEON()) {
741  addDRTypeForNEON(MVT::v2f32);
742  addDRTypeForNEON(MVT::v8i8);
743  addDRTypeForNEON(MVT::v4i16);
744  addDRTypeForNEON(MVT::v2i32);
745  addDRTypeForNEON(MVT::v1i64);
746 
747  addQRTypeForNEON(MVT::v4f32);
748  addQRTypeForNEON(MVT::v2f64);
749  addQRTypeForNEON(MVT::v16i8);
750  addQRTypeForNEON(MVT::v8i16);
751  addQRTypeForNEON(MVT::v4i32);
752  addQRTypeForNEON(MVT::v2i64);
753 
754  if (Subtarget->hasFullFP16()) {
755  addQRTypeForNEON(MVT::v8f16);
756  addDRTypeForNEON(MVT::v4f16);
757  }
758  }
759 
760  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
761  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
762  // none of Neon, MVE or VFP supports any arithmetic operations on it.
766  // FIXME: Code duplication: FDIV and FREM are expanded always, see
767  // ARMTargetLowering::addTypeForNEON method for details.
770  // FIXME: Create unittest.
771  // In another words, find a way when "copysign" appears in DAG with vector
772  // operands.
774  // FIXME: Code duplication: SETCC has custom operation action, see
775  // ARMTargetLowering::addTypeForNEON method for details.
777  // FIXME: Create unittest for FNEG and for FABS.
789  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
796  }
797 
798  if (Subtarget->hasNEON()) {
799  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
800  // supported for v4f32.
815 
816  // Mark v2f32 intrinsics.
831 
832  // Neon does not support some operations on v1i64 and v2i64 types.
834  // Custom handling for some quad-vector types to detect VMULL.
838  // Custom handling for some vector types to avoid expensive expansions
843  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
844  // a destination type that is wider than the source, and nor does
845  // it have a FP_TO_[SU]INT instruction with a narrower destination than
846  // source.
855 
858 
859  // NEON does not have single instruction CTPOP for vectors with element
860  // types wider than 8-bits. However, custom lowering can leverage the
861  // v8i8/v16i8 vcnt instruction.
868 
871 
872  // NEON does not have single instruction CTTZ for vectors.
877 
882 
887 
892 
893  // NEON only has FMA instructions as of VFP4.
894  if (!Subtarget->hasVFP4Base()) {
897  }
898 
909 
910  // It is legal to extload from v4i8 to v4i16 or v4i32.
912  MVT::v2i32}) {
917  }
918  }
919  }
920 
921  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
929  }
930 
931  if (!Subtarget->hasFP64()) {
932  // When targeting a floating-point unit with only single-precision
933  // operations, f64 is legal for the few double-precision instructions which
934  // are present However, no double-precision operations other than moves,
935  // loads and stores are provided by the hardware.
967  }
968 
969  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
971  if (Subtarget->hasFullFP16())
973  }
974 
975  if (!Subtarget->hasFP16())
977 
978  if (!Subtarget->hasFP64())
980 
982 
983  // ARM does not have floating-point extending loads.
984  for (MVT VT : MVT::fp_valuetypes()) {
987  }
988 
989  // ... or truncating stores
993 
994  // ARM does not have i1 sign extending load.
995  for (MVT VT : MVT::integer_valuetypes())
997 
998  // ARM supports all 4 flavors of integer indexed load / store.
999  if (!Subtarget->isThumb1Only()) {
1000  for (unsigned im = (unsigned)ISD::PRE_INC;
1010  }
1011  } else {
1012  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1015  }
1016 
1021 
1024 
1025  // i64 operation support.
1028  if (Subtarget->isThumb1Only()) {
1031  }
1032  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1033  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1035 
1043 
1044  // MVE lowers 64 bit shifts to lsll and lsrl
1045  // assuming that ISD::SRL and SRA of i64 are already marked custom
1046  if (Subtarget->hasMVEIntegerOps())
1048 
1049  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1050  if (Subtarget->isThumb1Only()) {
1054  }
1055 
1056  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1058 
1059  // ARM does not have ROTL.
1061  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1064  }
1067  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1070  }
1071 
1072  // @llvm.readcyclecounter requires the Performance Monitors extension.
1073  // Default to the 0 expansion on unsupported platforms.
1074  // FIXME: Technically there are older ARM CPUs that have
1075  // implementation-specific ways of obtaining this information.
1076  if (Subtarget->hasPerfMon())
1078 
1079  // Only ARMv6 has BSWAP.
1080  if (!Subtarget->hasV6Ops())
1082 
1083  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1084  : Subtarget->hasDivideInARMMode();
1085  if (!hasDivide) {
1086  // These are expanded into libcalls if the cpu doesn't have HW divider.
1089  }
1090 
1091  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1094 
1097  }
1098 
1101 
1102  // Register based DivRem for AEABI (RTABI 4.2)
1103  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1104  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1105  Subtarget->isTargetWindows()) {
1108  HasStandaloneRem = false;
1109 
1110  if (Subtarget->isTargetWindows()) {
1111  const struct {
1112  const RTLIB::Libcall Op;
1113  const char * const Name;
1114  const CallingConv::ID CC;
1115  } LibraryCalls[] = {
1116  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1117  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1118  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1119  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1120 
1121  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1122  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1123  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1124  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1125  };
1126 
1127  for (const auto &LC : LibraryCalls) {
1128  setLibcallName(LC.Op, LC.Name);
1129  setLibcallCallingConv(LC.Op, LC.CC);
1130  }
1131  } else {
1132  const struct {
1133  const RTLIB::Libcall Op;
1134  const char * const Name;
1135  const CallingConv::ID CC;
1136  } LibraryCalls[] = {
1137  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1138  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1139  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1140  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1141 
1142  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1143  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1144  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1145  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1146  };
1147 
1148  for (const auto &LC : LibraryCalls) {
1149  setLibcallName(LC.Op, LC.Name);
1150  setLibcallCallingConv(LC.Op, LC.CC);
1151  }
1152  }
1153 
1158  } else {
1161  }
1162 
1163  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
1164  for (auto &VT : {MVT::f32, MVT::f64})
1166 
1171 
1174 
1175  // Use the default implementation.
1182 
1183  if (Subtarget->isTargetWindows())
1185  else
1187 
1188  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1189  // the default expansion.
1190  InsertFencesForAtomic = false;
1191  if (Subtarget->hasAnyDataBarrier() &&
1192  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1193  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1194  // to ldrex/strex loops already.
1196  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1198 
1199  // On v8, we have particularly efficient implementations of atomic fences
1200  // if they can be combined with nearby atomic loads and stores.
1201  if (!Subtarget->hasAcquireRelease() ||
1202  getTargetMachine().getOptLevel() == 0) {
1203  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1204  InsertFencesForAtomic = true;
1205  }
1206  } else {
1207  // If there's anything we can use as a barrier, go through custom lowering
1208  // for ATOMIC_FENCE.
1209  // If target has DMB in thumb, Fences can be inserted.
1210  if (Subtarget->hasDataBarrier())
1211  InsertFencesForAtomic = true;
1212 
1214  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1215 
1216  // Set them all for expansion, which will force libcalls.
1229  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1230  // Unordered/Monotonic case.
1231  if (!InsertFencesForAtomic) {
1234  }
1235  }
1236 
1238 
1239  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1240  if (!Subtarget->hasV6Ops()) {
1243  }
1245 
1246  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1247  !Subtarget->isThumb1Only()) {
1248  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1249  // iff target supports vfp2.
1252  }
1253 
1254  // We want to custom lower some of our intrinsics.
1259  if (Subtarget->useSjLjEH())
1260  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1261 
1271  if (Subtarget->hasFullFP16()) {
1275  }
1276 
1278 
1281  if (Subtarget->hasFullFP16())
1286 
1287  // We don't support sin/cos/fmod/copysign/pow
1296  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1297  !Subtarget->isThumb1Only()) {
1300  }
1303 
1304  if (!Subtarget->hasVFP4Base()) {
1307  }
1308 
1309  // Various VFP goodness
1310  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1311  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1312  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1315  }
1316 
1317  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1318  if (!Subtarget->hasFP16()) {
1321  }
1322  }
1323 
1324  // Use __sincos_stret if available.
1325  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1326  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1329  }
1330 
1331  // FP-ARMv8 implements a lot of rounding-like FP operations.
1332  if (Subtarget->hasFPARMv8Base()) {
1341  if (Subtarget->hasNEON()) {
1346  }
1347 
1348  if (Subtarget->hasFP64()) {
1357  }
1358  }
1359 
1360  // FP16 often need to be promoted to call lib functions
1361  if (Subtarget->hasFullFP16()) {
1374 
1376  }
1377 
1378  if (Subtarget->hasNEON()) {
1379  // vmin and vmax aren't available in a scalar form, so we use
1380  // a NEON instruction with an undef lane instead.
1389 
1390  if (Subtarget->hasFullFP16()) {
1395 
1400  }
1401  }
1402 
1403  // We have target-specific dag combine patterns for the following nodes:
1404  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1411 
1412  if (Subtarget->hasV6Ops())
1414  if (Subtarget->isThumb1Only())
1416 
1418 
1419  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1420  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1422  else
1424 
1425  //// temporary - rewrite interface to use type
1426  MaxStoresPerMemset = 8;
1428  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1430  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1432 
1433  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1434  // are at least 4 bytes aligned.
1436 
1437  // Prefer likely predicted branches to selects on out-of-order cores.
1438  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1439 
1440  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1441 
1442  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1443 
1444  if (Subtarget->isThumb() || Subtarget->isThumb2())
1446 }
1447 
1449  return Subtarget->useSoftFloat();
1450 }
1451 
1452 // FIXME: It might make sense to define the representative register class as the
1453 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1454 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1455 // SPR's representative would be DPR_VFP2. This should work well if register
1456 // pressure tracking were modified such that a register use would increment the
1457 // pressure of the register class's representative and all of it's super
1458 // classes' representatives transitively. We have not implemented this because
1459 // of the difficulty prior to coalescing of modeling operand register classes
1460 // due to the common occurrence of cross class copies and subregister insertions
1461 // and extractions.
1462 std::pair<const TargetRegisterClass *, uint8_t>
1464  MVT VT) const {
1465  const TargetRegisterClass *RRC = nullptr;
1466  uint8_t Cost = 1;
1467  switch (VT.SimpleTy) {
1468  default:
1470  // Use DPR as representative register class for all floating point
1471  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1472  // the cost is 1 for both f32 and f64.
1473  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1474  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1475  RRC = &ARM::DPRRegClass;
1476  // When NEON is used for SP, only half of the register file is available
1477  // because operations that define both SP and DP results will be constrained
1478  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1479  // coalescing by double-counting the SP regs. See the FIXME above.
1480  if (Subtarget->useNEONForSinglePrecisionFP())
1481  Cost = 2;
1482  break;
1483  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1484  case MVT::v4f32: case MVT::v2f64:
1485  RRC = &ARM::DPRRegClass;
1486  Cost = 2;
1487  break;
1488  case MVT::v4i64:
1489  RRC = &ARM::DPRRegClass;
1490  Cost = 4;
1491  break;
1492  case MVT::v8i64:
1493  RRC = &ARM::DPRRegClass;
1494  Cost = 8;
1495  break;
1496  }
1497  return std::make_pair(RRC, Cost);
1498 }
1499 
1500 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1501  switch ((ARMISD::NodeType)Opcode) {
1502  case ARMISD::FIRST_NUMBER: break;
1503  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1504  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1505  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1506  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1507  case ARMISD::CALL: return "ARMISD::CALL";
1508  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1509  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1510  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1511  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1512  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1513  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1514  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1515  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1516  case ARMISD::CMP: return "ARMISD::CMP";
1517  case ARMISD::CMN: return "ARMISD::CMN";
1518  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1519  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1520  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1521  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1522  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1523 
1524  case ARMISD::CMOV: return "ARMISD::CMOV";
1525  case ARMISD::SUBS: return "ARMISD::SUBS";
1526 
1527  case ARMISD::SSAT: return "ARMISD::SSAT";
1528  case ARMISD::USAT: return "ARMISD::USAT";
1529 
1530  case ARMISD::ASRL: return "ARMISD::ASRL";
1531  case ARMISD::LSRL: return "ARMISD::LSRL";
1532  case ARMISD::LSLL: return "ARMISD::LSLL";
1533 
1534  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1535  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1536  case ARMISD::RRX: return "ARMISD::RRX";
1537 
1538  case ARMISD::ADDC: return "ARMISD::ADDC";
1539  case ARMISD::ADDE: return "ARMISD::ADDE";
1540  case ARMISD::SUBC: return "ARMISD::SUBC";
1541  case ARMISD::SUBE: return "ARMISD::SUBE";
1542  case ARMISD::LSLS: return "ARMISD::LSLS";
1543 
1544  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1545  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1546  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1547  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1548  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1549 
1550  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1551  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1552  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1553 
1554  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1555 
1556  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1557 
1558  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1559 
1560  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1561 
1562  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1563 
1564  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1565  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1566 
1567  case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1568  case ARMISD::VCMP: return "ARMISD::VCMP";
1569  case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1570  case ARMISD::VTST: return "ARMISD::VTST";
1571 
1572  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1573  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1574  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1575  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1576  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1577  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1578  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1579  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1580  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1581  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1582  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1583  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1584  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1585  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1586  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1587  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1588  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1589  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1590  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1591  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1592  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1593  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1594  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1595  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1596  case ARMISD::VDUP: return "ARMISD::VDUP";
1597  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1598  case ARMISD::VEXT: return "ARMISD::VEXT";
1599  case ARMISD::VREV64: return "ARMISD::VREV64";
1600  case ARMISD::VREV32: return "ARMISD::VREV32";
1601  case ARMISD::VREV16: return "ARMISD::VREV16";
1602  case ARMISD::VZIP: return "ARMISD::VZIP";
1603  case ARMISD::VUZP: return "ARMISD::VUZP";
1604  case ARMISD::VTRN: return "ARMISD::VTRN";
1605  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1606  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1607  case ARMISD::VMOVN: return "ARMISD::VMOVN";
1608  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1609  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1610  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1611  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1612  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1613  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1614  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1615  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1616  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1617  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1618  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1619  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1620  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1621  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1622  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1623  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1624  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1625  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1626  case ARMISD::BFI: return "ARMISD::BFI";
1627  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1628  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1629  case ARMISD::VBSL: return "ARMISD::VBSL";
1630  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1631  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1632  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1633  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1634  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1635  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1636  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1637  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1638  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1639  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1640  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1641  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1642  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1643  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1644  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1645  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1646  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1647  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1648  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1649  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1650  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1651  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1652  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1653  case ARMISD::WLS: return "ARMISD::WLS";
1654  case ARMISD::LE: return "ARMISD::LE";
1655  case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1656  case ARMISD::CSINV: return "ARMISD::CSINV";
1657  case ARMISD::CSNEG: return "ARMISD::CSNEG";
1658  case ARMISD::CSINC: return "ARMISD::CSINC";
1659  }
1660  return nullptr;
1661 }
1662 
1664  EVT VT) const {
1665  if (!VT.isVector())
1666  return getPointerTy(DL);
1667 
1668  // MVE has a predicate register.
1669  if (Subtarget->hasMVEIntegerOps() &&
1670  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1673 }
1674 
1675 /// getRegClassFor - Return the register class that should be used for the
1676 /// specified value type.
1677 const TargetRegisterClass *
1678 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1679  (void)isDivergent;
1680  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1681  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1682  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1683  // MVE Q registers.
1684  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1685  if (VT == MVT::v4i64)
1686  return &ARM::QQPRRegClass;
1687  if (VT == MVT::v8i64)
1688  return &ARM::QQQQPRRegClass;
1689  }
1690  return TargetLowering::getRegClassFor(VT);
1691 }
1692 
1693 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1694 // source/dest is aligned and the copy size is large enough. We therefore want
1695 // to align such objects passed to memory intrinsics.
1697  unsigned &PrefAlign) const {
1698  if (!isa<MemIntrinsic>(CI))
1699  return false;
1700  MinSize = 8;
1701  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1702  // cycle faster than 4-byte aligned LDM.
1703  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1704  return true;
1705 }
1706 
1707 // Create a fast isel object.
1708 FastISel *
1710  const TargetLibraryInfo *libInfo) const {
1711  return ARM::createFastISel(funcInfo, libInfo);
1712 }
1713 
1715  unsigned NumVals = N->getNumValues();
1716  if (!NumVals)
1717  return Sched::RegPressure;
1718 
1719  for (unsigned i = 0; i != NumVals; ++i) {
1720  EVT VT = N->getValueType(i);
1721  if (VT == MVT::Glue || VT == MVT::Other)
1722  continue;
1723  if (VT.isFloatingPoint() || VT.isVector())
1724  return Sched::ILP;
1725  }
1726 
1727  if (!N->isMachineOpcode())
1728  return Sched::RegPressure;
1729 
1730  // Load are scheduled for latency even if there instruction itinerary
1731  // is not available.
1732  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1733  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1734 
1735  if (MCID.getNumDefs() == 0)
1736  return Sched::RegPressure;
1737  if (!Itins->isEmpty() &&
1738  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1739  return Sched::ILP;
1740 
1741  return Sched::RegPressure;
1742 }
1743 
1744 //===----------------------------------------------------------------------===//
1745 // Lowering Code
1746 //===----------------------------------------------------------------------===//
1747 
1748 static bool isSRL16(const SDValue &Op) {
1749  if (Op.getOpcode() != ISD::SRL)
1750  return false;
1751  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1752  return Const->getZExtValue() == 16;
1753  return false;
1754 }
1755 
1756 static bool isSRA16(const SDValue &Op) {
1757  if (Op.getOpcode() != ISD::SRA)
1758  return false;
1759  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1760  return Const->getZExtValue() == 16;
1761  return false;
1762 }
1763 
1764 static bool isSHL16(const SDValue &Op) {
1765  if (Op.getOpcode() != ISD::SHL)
1766  return false;
1767  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1768  return Const->getZExtValue() == 16;
1769  return false;
1770 }
1771 
1772 // Check for a signed 16-bit value. We special case SRA because it makes it
1773 // more simple when also looking for SRAs that aren't sign extending a
1774 // smaller value. Without the check, we'd need to take extra care with
1775 // checking order for some operations.
1776 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1777  if (isSRA16(Op))
1778  return isSHL16(Op.getOperand(0));
1779  return DAG.ComputeNumSignBits(Op) == 17;
1780 }
1781 
1782 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1784  switch (CC) {
1785  default: llvm_unreachable("Unknown condition code!");
1786  case ISD::SETNE: return ARMCC::NE;
1787  case ISD::SETEQ: return ARMCC::EQ;
1788  case ISD::SETGT: return ARMCC::GT;
1789  case ISD::SETGE: return ARMCC::GE;
1790  case ISD::SETLT: return ARMCC::LT;
1791  case ISD::SETLE: return ARMCC::LE;
1792  case ISD::SETUGT: return ARMCC::HI;
1793  case ISD::SETUGE: return ARMCC::HS;
1794  case ISD::SETULT: return ARMCC::LO;
1795  case ISD::SETULE: return ARMCC::LS;
1796  }
1797 }
1798 
1799 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1801  ARMCC::CondCodes &CondCode2) {
1802  CondCode2 = ARMCC::AL;
1803  switch (CC) {
1804  default: llvm_unreachable("Unknown FP condition!");
1805  case ISD::SETEQ:
1806  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1807  case ISD::SETGT:
1808  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1809  case ISD::SETGE:
1810  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1811  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1812  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1813  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1814  case ISD::SETO: CondCode = ARMCC::VC; break;
1815  case ISD::SETUO: CondCode = ARMCC::VS; break;
1816  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1817  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1818  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1819  case ISD::SETLT:
1820  case ISD::SETULT: CondCode = ARMCC::LT; break;
1821  case ISD::SETLE:
1822  case ISD::SETULE: CondCode = ARMCC::LE; break;
1823  case ISD::SETNE:
1824  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1825  }
1826 }
1827 
1828 //===----------------------------------------------------------------------===//
1829 // Calling Convention Implementation
1830 //===----------------------------------------------------------------------===//
1831 
1832 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1833 /// account presence of floating point hardware and calling convention
1834 /// limitations, such as support for variadic functions.
1836 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1837  bool isVarArg) const {
1838  switch (CC) {
1839  default:
1840  report_fatal_error("Unsupported calling convention");
1842  case CallingConv::ARM_APCS:
1843  case CallingConv::GHC:
1844  return CC;
1848  case CallingConv::Swift:
1850  case CallingConv::C:
1851  if (!Subtarget->isAAPCS_ABI())
1852  return CallingConv::ARM_APCS;
1853  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1855  !isVarArg)
1857  else
1858  return CallingConv::ARM_AAPCS;
1859  case CallingConv::Fast:
1861  if (!Subtarget->isAAPCS_ABI()) {
1862  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1863  return CallingConv::Fast;
1864  return CallingConv::ARM_APCS;
1865  } else if (Subtarget->hasVFP2Base() &&
1866  !Subtarget->isThumb1Only() && !isVarArg)
1868  else
1869  return CallingConv::ARM_AAPCS;
1870  }
1871 }
1872 
1874  bool isVarArg) const {
1875  return CCAssignFnForNode(CC, false, isVarArg);
1876 }
1877 
1879  bool isVarArg) const {
1880  return CCAssignFnForNode(CC, true, isVarArg);
1881 }
1882 
1883 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1884 /// CallingConvention.
1885 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1886  bool Return,
1887  bool isVarArg) const {
1888  switch (getEffectiveCallingConv(CC, isVarArg)) {
1889  default:
1890  report_fatal_error("Unsupported calling convention");
1891  case CallingConv::ARM_APCS:
1892  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1894  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1896  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1897  case CallingConv::Fast:
1898  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1899  case CallingConv::GHC:
1900  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1902  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1903  }
1904 }
1905 
1906 /// LowerCallResult - Lower the result values of a call into the
1907 /// appropriate copies out of appropriate physical registers.
1908 SDValue ARMTargetLowering::LowerCallResult(
1909  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1910  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1911  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1912  SDValue ThisVal) const {
1913  // Assign locations to each value returned by this call.
1915  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1916  *DAG.getContext());
1917  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1918 
1919  // Copy all of the result registers out of their specified physreg.
1920  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1921  CCValAssign VA = RVLocs[i];
1922 
1923  // Pass 'this' value directly from the argument to return value, to avoid
1924  // reg unit interference
1925  if (i == 0 && isThisReturn) {
1926  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1927  "unexpected return calling convention register assignment");
1928  InVals.push_back(ThisVal);
1929  continue;
1930  }
1931 
1932  SDValue Val;
1933  if (VA.needsCustom()) {
1934  // Handle f64 or half of a v2f64.
1935  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1936  InFlag);
1937  Chain = Lo.getValue(1);
1938  InFlag = Lo.getValue(2);
1939  VA = RVLocs[++i]; // skip ahead to next loc
1940  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1941  InFlag);
1942  Chain = Hi.getValue(1);
1943  InFlag = Hi.getValue(2);
1944  if (!Subtarget->isLittle())
1945  std::swap (Lo, Hi);
1946  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1947 
1948  if (VA.getLocVT() == MVT::v2f64) {
1949  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1950  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1951  DAG.getConstant(0, dl, MVT::i32));
1952 
1953  VA = RVLocs[++i]; // skip ahead to next loc
1954  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1955  Chain = Lo.getValue(1);
1956  InFlag = Lo.getValue(2);
1957  VA = RVLocs[++i]; // skip ahead to next loc
1958  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1959  Chain = Hi.getValue(1);
1960  InFlag = Hi.getValue(2);
1961  if (!Subtarget->isLittle())
1962  std::swap (Lo, Hi);
1963  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1964  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1965  DAG.getConstant(1, dl, MVT::i32));
1966  }
1967  } else {
1968  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1969  InFlag);
1970  Chain = Val.getValue(1);
1971  InFlag = Val.getValue(2);
1972  }
1973 
1974  switch (VA.getLocInfo()) {
1975  default: llvm_unreachable("Unknown loc info!");
1976  case CCValAssign::Full: break;
1977  case CCValAssign::BCvt:
1978  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1979  break;
1980  }
1981 
1982  InVals.push_back(Val);
1983  }
1984 
1985  return Chain;
1986 }
1987 
1988 /// LowerMemOpCallTo - Store the argument to the stack.
1989 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1990  SDValue Arg, const SDLoc &dl,
1991  SelectionDAG &DAG,
1992  const CCValAssign &VA,
1993  ISD::ArgFlagsTy Flags) const {
1994  unsigned LocMemOffset = VA.getLocMemOffset();
1995  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1996  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1997  StackPtr, PtrOff);
1998  return DAG.getStore(
1999  Chain, dl, Arg, PtrOff,
2000  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2001 }
2002 
2003 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2004  SDValue Chain, SDValue &Arg,
2005  RegsToPassVector &RegsToPass,
2006  CCValAssign &VA, CCValAssign &NextVA,
2007  SDValue &StackPtr,
2008  SmallVectorImpl<SDValue> &MemOpChains,
2009  ISD::ArgFlagsTy Flags) const {
2010  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2011  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2012  unsigned id = Subtarget->isLittle() ? 0 : 1;
2013  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2014 
2015  if (NextVA.isRegLoc())
2016  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2017  else {
2018  assert(NextVA.isMemLoc());
2019  if (!StackPtr.getNode())
2020  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2021  getPointerTy(DAG.getDataLayout()));
2022 
2023  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
2024  dl, DAG, NextVA,
2025  Flags));
2026  }
2027 }
2028 
2029 /// LowerCall - Lowering a call into a callseq_start <-
2030 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2031 /// nodes.
2032 SDValue
2033 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2034  SmallVectorImpl<SDValue> &InVals) const {
2035  SelectionDAG &DAG = CLI.DAG;
2036  SDLoc &dl = CLI.DL;
2038  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2040  SDValue Chain = CLI.Chain;
2041  SDValue Callee = CLI.Callee;
2042  bool &isTailCall = CLI.IsTailCall;
2043  CallingConv::ID CallConv = CLI.CallConv;
2044  bool doesNotRet = CLI.DoesNotReturn;
2045  bool isVarArg = CLI.IsVarArg;
2046 
2047  MachineFunction &MF = DAG.getMachineFunction();
2049  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2050  bool isThisReturn = false;
2051  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
2052  bool PreferIndirect = false;
2053 
2054  // Disable tail calls if they're not supported.
2055  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
2056  isTailCall = false;
2057 
2058  if (isa<GlobalAddressSDNode>(Callee)) {
2059  // If we're optimizing for minimum size and the function is called three or
2060  // more times in this block, we can improve codesize by calling indirectly
2061  // as BLXr has a 16-bit encoding.
2062  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2063  if (CLI.CS) {
2064  auto *BB = CLI.CS.getParent();
2065  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2066  count_if(GV->users(), [&BB](const User *U) {
2067  return isa<Instruction>(U) &&
2068  cast<Instruction>(U)->getParent() == BB;
2069  }) > 2;
2070  }
2071  }
2072  if (isTailCall) {
2073  // Check if it's really possible to do a tail call.
2074  isTailCall = IsEligibleForTailCallOptimization(
2075  Callee, CallConv, isVarArg, isStructRet,
2076  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2077  PreferIndirect);
2078  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2079  report_fatal_error("failed to perform tail call elimination on a call "
2080  "site marked musttail");
2081  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2082  // detected sibcalls.
2083  if (isTailCall)
2084  ++NumTailCalls;
2085  }
2086 
2087  // Analyze operands of the call, assigning locations to each operand.
2089  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2090  *DAG.getContext());
2091  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2092 
2093  // Get a count of how many bytes are to be pushed on the stack.
2094  unsigned NumBytes = CCInfo.getNextStackOffset();
2095 
2096  if (isTailCall) {
2097  // For tail calls, memory operands are available in our caller's stack.
2098  NumBytes = 0;
2099  } else {
2100  // Adjust the stack pointer for the new arguments...
2101  // These operations are automatically eliminated by the prolog/epilog pass
2102  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2103  }
2104 
2105  SDValue StackPtr =
2106  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2107 
2108  RegsToPassVector RegsToPass;
2109  SmallVector<SDValue, 8> MemOpChains;
2110 
2111  // Walk the register/memloc assignments, inserting copies/loads. In the case
2112  // of tail call optimization, arguments are handled later.
2113  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2114  i != e;
2115  ++i, ++realArgIdx) {
2116  CCValAssign &VA = ArgLocs[i];
2117  SDValue Arg = OutVals[realArgIdx];
2118  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2119  bool isByVal = Flags.isByVal();
2120 
2121  // Promote the value if needed.
2122  switch (VA.getLocInfo()) {
2123  default: llvm_unreachable("Unknown loc info!");
2124  case CCValAssign::Full: break;
2125  case CCValAssign::SExt:
2126  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2127  break;
2128  case CCValAssign::ZExt:
2129  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2130  break;
2131  case CCValAssign::AExt:
2132  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2133  break;
2134  case CCValAssign::BCvt:
2135  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2136  break;
2137  }
2138 
2139  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2140  if (VA.needsCustom()) {
2141  if (VA.getLocVT() == MVT::v2f64) {
2142  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2143  DAG.getConstant(0, dl, MVT::i32));
2144  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2145  DAG.getConstant(1, dl, MVT::i32));
2146 
2147  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2148  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2149 
2150  VA = ArgLocs[++i]; // skip ahead to next loc
2151  if (VA.isRegLoc()) {
2152  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2153  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2154  } else {
2155  assert(VA.isMemLoc());
2156 
2157  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2158  dl, DAG, VA, Flags));
2159  }
2160  } else {
2161  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2162  StackPtr, MemOpChains, Flags);
2163  }
2164  } else if (VA.isRegLoc()) {
2165  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2166  Outs[0].VT == MVT::i32) {
2167  assert(VA.getLocVT() == MVT::i32 &&
2168  "unexpected calling convention register assignment");
2169  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2170  "unexpected use of 'returned'");
2171  isThisReturn = true;
2172  }
2173  const TargetOptions &Options = DAG.getTarget().Options;
2174  if (Options.EnableDebugEntryValues)
2175  CSInfo.emplace_back(VA.getLocReg(), i);
2176  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2177  } else if (isByVal) {
2178  assert(VA.isMemLoc());
2179  unsigned offset = 0;
2180 
2181  // True if this byval aggregate will be split between registers
2182  // and memory.
2183  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2184  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2185 
2186  if (CurByValIdx < ByValArgsCount) {
2187 
2188  unsigned RegBegin, RegEnd;
2189  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2190 
2191  EVT PtrVT =
2193  unsigned int i, j;
2194  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2195  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2196  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2197  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2199  DAG.InferPtrAlignment(AddArg));
2200  MemOpChains.push_back(Load.getValue(1));
2201  RegsToPass.push_back(std::make_pair(j, Load));
2202  }
2203 
2204  // If parameter size outsides register area, "offset" value
2205  // helps us to calculate stack slot for remained part properly.
2206  offset = RegEnd - RegBegin;
2207 
2208  CCInfo.nextInRegsParam();
2209  }
2210 
2211  if (Flags.getByValSize() > 4*offset) {
2212  auto PtrVT = getPointerTy(DAG.getDataLayout());
2213  unsigned LocMemOffset = VA.getLocMemOffset();
2214  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2215  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2216  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2217  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2218  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2219  MVT::i32);
2220  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2221  MVT::i32);
2222 
2223  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2224  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2225  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2226  Ops));
2227  }
2228  } else if (!isTailCall) {
2229  assert(VA.isMemLoc());
2230 
2231  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2232  dl, DAG, VA, Flags));
2233  }
2234  }
2235 
2236  if (!MemOpChains.empty())
2237  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2238 
2239  // Build a sequence of copy-to-reg nodes chained together with token chain
2240  // and flag operands which copy the outgoing args into the appropriate regs.
2241  SDValue InFlag;
2242  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2243  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2244  RegsToPass[i].second, InFlag);
2245  InFlag = Chain.getValue(1);
2246  }
2247 
2248  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2249  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2250  // node so that legalize doesn't hack it.
2251  bool isDirect = false;
2252 
2253  const TargetMachine &TM = getTargetMachine();
2254  const Module *Mod = MF.getFunction().getParent();
2255  const GlobalValue *GV = nullptr;
2256  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2257  GV = G->getGlobal();
2258  bool isStub =
2259  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2260 
2261  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2262  bool isLocalARMFunc = false;
2264  auto PtrVt = getPointerTy(DAG.getDataLayout());
2265 
2266  if (Subtarget->genLongCalls()) {
2267  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2268  "long-calls codegen is not position independent!");
2269  // Handle a global address or an external symbol. If it's not one of
2270  // those, the target's already in a register, so we don't need to do
2271  // anything extra.
2272  if (isa<GlobalAddressSDNode>(Callee)) {
2273  // Create a constant pool entry for the callee address
2274  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2275  ARMConstantPoolValue *CPV =
2276  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2277 
2278  // Get the address of the callee into a register
2279  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2280  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2281  Callee = DAG.getLoad(
2282  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2284  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2285  const char *Sym = S->getSymbol();
2286 
2287  // Create a constant pool entry for the callee address
2288  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2289  ARMConstantPoolValue *CPV =
2291  ARMPCLabelIndex, 0);
2292  // Get the address of the callee into a register
2293  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2294  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2295  Callee = DAG.getLoad(
2296  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2298  }
2299  } else if (isa<GlobalAddressSDNode>(Callee)) {
2300  if (!PreferIndirect) {
2301  isDirect = true;
2302  bool isDef = GV->isStrongDefinitionForLinker();
2303 
2304  // ARM call to a local ARM function is predicable.
2305  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2306  // tBX takes a register source operand.
2307  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2308  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2309  Callee = DAG.getNode(
2310  ARMISD::WrapperPIC, dl, PtrVt,
2311  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2312  Callee = DAG.getLoad(
2313  PtrVt, dl, DAG.getEntryNode(), Callee,
2315  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2317  } else if (Subtarget->isTargetCOFF()) {
2318  assert(Subtarget->isTargetWindows() &&
2319  "Windows is the only supported COFF target");
2320  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2323  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2324  TargetFlags);
2325  if (GV->hasDLLImportStorageClass())
2326  Callee =
2327  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2328  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2330  } else {
2331  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2332  }
2333  }
2334  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2335  isDirect = true;
2336  // tBX takes a register source operand.
2337  const char *Sym = S->getSymbol();
2338  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2339  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2340  ARMConstantPoolValue *CPV =
2342  ARMPCLabelIndex, 4);
2343  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2344  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2345  Callee = DAG.getLoad(
2346  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2348  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2349  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2350  } else {
2351  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2352  }
2353  }
2354 
2355  // FIXME: handle tail calls differently.
2356  unsigned CallOpc;
2357  if (Subtarget->isThumb()) {
2358  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2359  CallOpc = ARMISD::CALL_NOLINK;
2360  else
2361  CallOpc = ARMISD::CALL;
2362  } else {
2363  if (!isDirect && !Subtarget->hasV5TOps())
2364  CallOpc = ARMISD::CALL_NOLINK;
2365  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2366  // Emit regular call when code size is the priority
2367  !Subtarget->hasMinSize())
2368  // "mov lr, pc; b _foo" to avoid confusing the RSP
2369  CallOpc = ARMISD::CALL_NOLINK;
2370  else
2371  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2372  }
2373 
2374  std::vector<SDValue> Ops;
2375  Ops.push_back(Chain);
2376  Ops.push_back(Callee);
2377 
2378  // Add argument registers to the end of the list so that they are known live
2379  // into the call.
2380  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2381  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2382  RegsToPass[i].second.getValueType()));
2383 
2384  // Add a register mask operand representing the call-preserved registers.
2385  if (!isTailCall) {
2386  const uint32_t *Mask;
2387  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2388  if (isThisReturn) {
2389  // For 'this' returns, use the R0-preserving mask if applicable
2390  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2391  if (!Mask) {
2392  // Set isThisReturn to false if the calling convention is not one that
2393  // allows 'returned' to be modeled in this way, so LowerCallResult does
2394  // not try to pass 'this' straight through
2395  isThisReturn = false;
2396  Mask = ARI->getCallPreservedMask(MF, CallConv);
2397  }
2398  } else
2399  Mask = ARI->getCallPreservedMask(MF, CallConv);
2400 
2401  assert(Mask && "Missing call preserved mask for calling convention");
2402  Ops.push_back(DAG.getRegisterMask(Mask));
2403  }
2404 
2405  if (InFlag.getNode())
2406  Ops.push_back(InFlag);
2407 
2408  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2409  if (isTailCall) {
2411  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2412  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2413  return Ret;
2414  }
2415 
2416  // Returns a chain and a flag for retval copy to use.
2417  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2418  InFlag = Chain.getValue(1);
2419  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2420 
2421  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2422  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2423  if (!Ins.empty())
2424  InFlag = Chain.getValue(1);
2425 
2426  // Handle result values, copying them out of physregs into vregs that we
2427  // return.
2428  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2429  InVals, isThisReturn,
2430  isThisReturn ? OutVals[0] : SDValue());
2431 }
2432 
2433 /// HandleByVal - Every parameter *after* a byval parameter is passed
2434 /// on the stack. Remember the next parameter register to allocate,
2435 /// and then confiscate the rest of the parameter registers to insure
2436 /// this.
2437 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2438  unsigned Align) const {
2439  // Byval (as with any stack) slots are always at least 4 byte aligned.
2440  Align = std::max(Align, 4U);
2441 
2442  unsigned Reg = State->AllocateReg(GPRArgRegs);
2443  if (!Reg)
2444  return;
2445 
2446  unsigned AlignInRegs = Align / 4;
2447  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2448  for (unsigned i = 0; i < Waste; ++i)
2449  Reg = State->AllocateReg(GPRArgRegs);
2450 
2451  if (!Reg)
2452  return;
2453 
2454  unsigned Excess = 4 * (ARM::R4 - Reg);
2455 
2456  // Special case when NSAA != SP and parameter size greater than size of
2457  // all remained GPR regs. In that case we can't split parameter, we must
2458  // send it to stack. We also must set NCRN to R4, so waste all
2459  // remained registers.
2460  const unsigned NSAAOffset = State->getNextStackOffset();
2461  if (NSAAOffset != 0 && Size > Excess) {
2462  while (State->AllocateReg(GPRArgRegs))
2463  ;
2464  return;
2465  }
2466 
2467  // First register for byval parameter is the first register that wasn't
2468  // allocated before this method call, so it would be "reg".
2469  // If parameter is small enough to be saved in range [reg, r4), then
2470  // the end (first after last) register would be reg + param-size-in-regs,
2471  // else parameter would be splitted between registers and stack,
2472  // end register would be r4 in this case.
2473  unsigned ByValRegBegin = Reg;
2474  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2475  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2476  // Note, first register is allocated in the beginning of function already,
2477  // allocate remained amount of registers we need.
2478  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2479  State->AllocateReg(GPRArgRegs);
2480  // A byval parameter that is split between registers and memory needs its
2481  // size truncated here.
2482  // In the case where the entire structure fits in registers, we set the
2483  // size in memory to zero.
2484  Size = std::max<int>(Size - Excess, 0);
2485 }
2486 
2487 /// MatchingStackOffset - Return true if the given stack call argument is
2488 /// already available in the same position (relatively) of the caller's
2489 /// incoming argument stack.
2490 static
2493  const TargetInstrInfo *TII) {
2494  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2495  int FI = std::numeric_limits<int>::max();
2496  if (Arg.getOpcode() == ISD::CopyFromReg) {
2497  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2498  if (!Register::isVirtualRegister(VR))
2499  return false;
2500  MachineInstr *Def = MRI->getVRegDef(VR);
2501  if (!Def)
2502  return false;
2503  if (!Flags.isByVal()) {
2504  if (!TII->isLoadFromStackSlot(*Def, FI))
2505  return false;
2506  } else {
2507  return false;
2508  }
2509  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2510  if (Flags.isByVal())
2511  // ByVal argument is passed in as a pointer but it's now being
2512  // dereferenced. e.g.
2513  // define @foo(%struct.X* %A) {
2514  // tail call @bar(%struct.X* byval %A)
2515  // }
2516  return false;
2517  SDValue Ptr = Ld->getBasePtr();
2518  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2519  if (!FINode)
2520  return false;
2521  FI = FINode->getIndex();
2522  } else
2523  return false;
2524 
2526  if (!MFI.isFixedObjectIndex(FI))
2527  return false;
2528  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2529 }
2530 
2531 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2532 /// for tail call optimization. Targets which want to do tail call
2533 /// optimization should implement this function.
2534 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2535  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2536  bool isCalleeStructRet, bool isCallerStructRet,
2537  const SmallVectorImpl<ISD::OutputArg> &Outs,
2538  const SmallVectorImpl<SDValue> &OutVals,
2540  const bool isIndirect) const {
2541  MachineFunction &MF = DAG.getMachineFunction();
2542  const Function &CallerF = MF.getFunction();
2543  CallingConv::ID CallerCC = CallerF.getCallingConv();
2544 
2545  assert(Subtarget->supportsTailCall());
2546 
2547  // Indirect tail calls cannot be optimized for Thumb1 if the args
2548  // to the call take up r0-r3. The reason is that there are no legal registers
2549  // left to hold the pointer to the function to be called.
2550  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2551  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2552  return false;
2553 
2554  // Look for obvious safe cases to perform tail call optimization that do not
2555  // require ABI changes. This is what gcc calls sibcall.
2556 
2557  // Exception-handling functions need a special set of instructions to indicate
2558  // a return to the hardware. Tail-calling another function would probably
2559  // break this.
2560  if (CallerF.hasFnAttribute("interrupt"))
2561  return false;
2562 
2563  // Also avoid sibcall optimization if either caller or callee uses struct
2564  // return semantics.
2565  if (isCalleeStructRet || isCallerStructRet)
2566  return false;
2567 
2568  // Externally-defined functions with weak linkage should not be
2569  // tail-called on ARM when the OS does not support dynamic
2570  // pre-emption of symbols, as the AAELF spec requires normal calls
2571  // to undefined weak functions to be replaced with a NOP or jump to the
2572  // next instruction. The behaviour of branch instructions in this
2573  // situation (as used for tail calls) is implementation-defined, so we
2574  // cannot rely on the linker replacing the tail call with a return.
2575  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2576  const GlobalValue *GV = G->getGlobal();
2578  if (GV->hasExternalWeakLinkage() &&
2579  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2580  return false;
2581  }
2582 
2583  // Check that the call results are passed in the same way.
2584  LLVMContext &C = *DAG.getContext();
2585  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2586  CCAssignFnForReturn(CalleeCC, isVarArg),
2587  CCAssignFnForReturn(CallerCC, isVarArg)))
2588  return false;
2589  // The callee has to preserve all registers the caller needs to preserve.
2590  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2591  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2592  if (CalleeCC != CallerCC) {
2593  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2594  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2595  return false;
2596  }
2597 
2598  // If Caller's vararg or byval argument has been split between registers and
2599  // stack, do not perform tail call, since part of the argument is in caller's
2600  // local frame.
2601  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2602  if (AFI_Caller->getArgRegsSaveSize())
2603  return false;
2604 
2605  // If the callee takes no arguments then go on to check the results of the
2606  // call.
2607  if (!Outs.empty()) {
2608  // Check if stack adjustment is needed. For now, do not do this if any
2609  // argument is passed on the stack.
2611  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2612  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2613  if (CCInfo.getNextStackOffset()) {
2614  // Check if the arguments are already laid out in the right way as
2615  // the caller's fixed stack objects.
2616  MachineFrameInfo &MFI = MF.getFrameInfo();
2617  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2618  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2619  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2620  i != e;
2621  ++i, ++realArgIdx) {
2622  CCValAssign &VA = ArgLocs[i];
2623  EVT RegVT = VA.getLocVT();
2624  SDValue Arg = OutVals[realArgIdx];
2625  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2626  if (VA.getLocInfo() == CCValAssign::Indirect)
2627  return false;
2628  if (VA.needsCustom()) {
2629  // f64 and vector types are split into multiple registers or
2630  // register/stack-slot combinations. The types will not match
2631  // the registers; give up on memory f64 refs until we figure
2632  // out what to do about this.
2633  if (!VA.isRegLoc())
2634  return false;
2635  if (!ArgLocs[++i].isRegLoc())
2636  return false;
2637  if (RegVT == MVT::v2f64) {
2638  if (!ArgLocs[++i].isRegLoc())
2639  return false;
2640  if (!ArgLocs[++i].isRegLoc())
2641  return false;
2642  }
2643  } else if (!VA.isRegLoc()) {
2644  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2645  MFI, MRI, TII))
2646  return false;
2647  }
2648  }
2649  }
2650 
2651  const MachineRegisterInfo &MRI = MF.getRegInfo();
2652  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2653  return false;
2654  }
2655 
2656  return true;
2657 }
2658 
2659 bool
2660 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2661  MachineFunction &MF, bool isVarArg,
2662  const SmallVectorImpl<ISD::OutputArg> &Outs,
2663  LLVMContext &Context) const {
2665  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2666  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2667 }
2668 
2670  const SDLoc &DL, SelectionDAG &DAG) {
2671  const MachineFunction &MF = DAG.getMachineFunction();
2672  const Function &F = MF.getFunction();
2673 
2674  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2675 
2676  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2677  // version of the "preferred return address". These offsets affect the return
2678  // instruction if this is a return from PL1 without hypervisor extensions.
2679  // IRQ/FIQ: +4 "subs pc, lr, #4"
2680  // SWI: 0 "subs pc, lr, #0"
2681  // ABORT: +4 "subs pc, lr, #4"
2682  // UNDEF: +4/+2 "subs pc, lr, #0"
2683  // UNDEF varies depending on where the exception came from ARM or Thumb
2684  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2685 
2686  int64_t LROffset;
2687  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2688  IntKind == "ABORT")
2689  LROffset = 4;
2690  else if (IntKind == "SWI" || IntKind == "UNDEF")
2691  LROffset = 0;
2692  else
2693  report_fatal_error("Unsupported interrupt attribute. If present, value "
2694  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2695 
2696  RetOps.insert(RetOps.begin() + 1,
2697  DAG.getConstant(LROffset, DL, MVT::i32, false));
2698 
2699  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2700 }
2701 
2702 SDValue
2703 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2704  bool isVarArg,
2705  const SmallVectorImpl<ISD::OutputArg> &Outs,
2706  const SmallVectorImpl<SDValue> &OutVals,
2707  const SDLoc &dl, SelectionDAG &DAG) const {
2708  // CCValAssign - represent the assignment of the return value to a location.
2710 
2711  // CCState - Info about the registers and stack slots.
2712  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2713  *DAG.getContext());
2714 
2715  // Analyze outgoing return values.
2716  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2717 
2718  SDValue Flag;
2719  SmallVector<SDValue, 4> RetOps;
2720  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2721  bool isLittleEndian = Subtarget->isLittle();
2722 
2723  MachineFunction &MF = DAG.getMachineFunction();
2725  AFI->setReturnRegsCount(RVLocs.size());
2726 
2727  // Copy the result values into the output registers.
2728  for (unsigned i = 0, realRVLocIdx = 0;
2729  i != RVLocs.size();
2730  ++i, ++realRVLocIdx) {
2731  CCValAssign &VA = RVLocs[i];
2732  assert(VA.isRegLoc() && "Can only return in registers!");
2733 
2734  SDValue Arg = OutVals[realRVLocIdx];
2735  bool ReturnF16 = false;
2736 
2737  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2738  // Half-precision return values can be returned like this:
2739  //
2740  // t11 f16 = fadd ...
2741  // t12: i16 = bitcast t11
2742  // t13: i32 = zero_extend t12
2743  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2744  //
2745  // to avoid code generation for bitcasts, we simply set Arg to the node
2746  // that produces the f16 value, t11 in this case.
2747  //
2748  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2749  SDValue ZE = Arg.getOperand(0);
2750  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2751  SDValue BC = ZE.getOperand(0);
2752  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2753  Arg = BC.getOperand(0);
2754  ReturnF16 = true;
2755  }
2756  }
2757  }
2758  }
2759 
2760  switch (VA.getLocInfo()) {
2761  default: llvm_unreachable("Unknown loc info!");
2762  case CCValAssign::Full: break;
2763  case CCValAssign::BCvt:
2764  if (!ReturnF16)
2765  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2766  break;
2767  }
2768 
2769  if (VA.needsCustom()) {
2770  if (VA.getLocVT() == MVT::v2f64) {
2771  // Extract the first half and return it in two registers.
2772  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2773  DAG.getConstant(0, dl, MVT::i32));
2774  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2775  DAG.getVTList(MVT::i32, MVT::i32), Half);
2776 
2777  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2778  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2779  Flag);
2780  Flag = Chain.getValue(1);
2781  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2782  VA = RVLocs[++i]; // skip ahead to next loc
2783  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2784  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2785  Flag);
2786  Flag = Chain.getValue(1);
2787  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2788  VA = RVLocs[++i]; // skip ahead to next loc
2789 
2790  // Extract the 2nd half and fall through to handle it as an f64 value.
2791  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2792  DAG.getConstant(1, dl, MVT::i32));
2793  }
2794  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2795  // available.
2796  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2797  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2798  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2799  fmrrd.getValue(isLittleEndian ? 0 : 1),
2800  Flag);
2801  Flag = Chain.getValue(1);
2802  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2803  VA = RVLocs[++i]; // skip ahead to next loc
2804  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2805  fmrrd.getValue(isLittleEndian ? 1 : 0),
2806  Flag);
2807  } else
2808  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2809 
2810  // Guarantee that all emitted copies are
2811  // stuck together, avoiding something bad.
2812  Flag = Chain.getValue(1);
2813  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2814  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2815  }
2816  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2817  const MCPhysReg *I =
2819  if (I) {
2820  for (; *I; ++I) {
2821  if (ARM::GPRRegClass.contains(*I))
2822  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2823  else if (ARM::DPRRegClass.contains(*I))
2824  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2825  else
2826  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2827  }
2828  }
2829 
2830  // Update chain and glue.
2831  RetOps[0] = Chain;
2832  if (Flag.getNode())
2833  RetOps.push_back(Flag);
2834 
2835  // CPUs which aren't M-class use a special sequence to return from
2836  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2837  // though we use "subs pc, lr, #N").
2838  //
2839  // M-class CPUs actually use a normal return sequence with a special
2840  // (hardware-provided) value in LR, so the normal code path works.
2841  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2842  !Subtarget->isMClass()) {
2843  if (Subtarget->isThumb1Only())
2844  report_fatal_error("interrupt attribute is not supported in Thumb1");
2845  return LowerInterruptReturn(RetOps, dl, DAG);
2846  }
2847 
2848  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2849 }
2850 
2851 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2852  if (N->getNumValues() != 1)
2853  return false;
2854  if (!N->hasNUsesOfValue(1, 0))
2855  return false;
2856 
2857  SDValue TCChain = Chain;
2858  SDNode *Copy = *N->use_begin();
2859  if (Copy->getOpcode() == ISD::CopyToReg) {
2860  // If the copy has a glue operand, we conservatively assume it isn't safe to
2861  // perform a tail call.
2862  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2863  return false;
2864  TCChain = Copy->getOperand(0);
2865  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2866  SDNode *VMov = Copy;
2867  // f64 returned in a pair of GPRs.
2869  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2870  UI != UE; ++UI) {
2871  if (UI->getOpcode() != ISD::CopyToReg)
2872  return false;
2873  Copies.insert(*UI);
2874  }
2875  if (Copies.size() > 2)
2876  return false;
2877 
2878  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2879  UI != UE; ++UI) {
2880  SDValue UseChain = UI->getOperand(0);
2881  if (Copies.count(UseChain.getNode()))
2882  // Second CopyToReg
2883  Copy = *UI;
2884  else {
2885  // We are at the top of this chain.
2886  // If the copy has a glue operand, we conservatively assume it
2887  // isn't safe to perform a tail call.
2888  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2889  return false;
2890  // First CopyToReg
2891  TCChain = UseChain;
2892  }
2893  }
2894  } else if (Copy->getOpcode() == ISD::BITCAST) {
2895  // f32 returned in a single GPR.
2896  if (!Copy->hasOneUse())
2897  return false;
2898  Copy = *Copy->use_begin();
2899  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2900  return false;
2901  // If the copy has a glue operand, we conservatively assume it isn't safe to
2902  // perform a tail call.
2903  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2904  return false;
2905  TCChain = Copy->getOperand(0);
2906  } else {
2907  return false;
2908  }
2909 
2910  bool HasRet = false;
2911  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2912  UI != UE; ++UI) {
2913  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2914  UI->getOpcode() != ARMISD::INTRET_FLAG)
2915  return false;
2916  HasRet = true;
2917  }
2918 
2919  if (!HasRet)
2920  return false;
2921 
2922  Chain = TCChain;
2923  return true;
2924 }
2925 
2926 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2927  if (!Subtarget->supportsTailCall())
2928  return false;
2929 
2930  auto Attr =
2931  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2932  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2933  return false;
2934 
2935  return true;
2936 }
2937 
2938 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2939 // and pass the lower and high parts through.
2941  SDLoc DL(Op);
2942  SDValue WriteValue = Op->getOperand(2);
2943 
2944  // This function is only supposed to be called for i64 type argument.
2945  assert(WriteValue.getValueType() == MVT::i64
2946  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2947 
2948  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2949  DAG.getConstant(0, DL, MVT::i32));
2950  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2951  DAG.getConstant(1, DL, MVT::i32));
2952  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2953  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2954 }
2955 
2956 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2957 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2958 // one of the above mentioned nodes. It has to be wrapped because otherwise
2959 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2960 // be used to form addressing mode. These wrapped nodes will be selected
2961 // into MOVi.
2962 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2963  SelectionDAG &DAG) const {
2964  EVT PtrVT = Op.getValueType();
2965  // FIXME there is no actual debug info here
2966  SDLoc dl(Op);
2967  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2968  SDValue Res;
2969 
2970  // When generating execute-only code Constant Pools must be promoted to the
2971  // global data section. It's a bit ugly that we can't share them across basic
2972  // blocks, but this way we guarantee that execute-only behaves correct with
2973  // position-independent addressing modes.
2974  if (Subtarget->genExecuteOnly()) {
2975  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2976  auto T = const_cast<Type*>(CP->getType());
2977  auto C = const_cast<Constant*>(CP->getConstVal());
2978  auto M = const_cast<Module*>(DAG.getMachineFunction().
2979  getFunction().getParent());
2980  auto GV = new GlobalVariable(
2981  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
2982  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2983  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2984  Twine(AFI->createPICLabelUId())
2985  );
2986  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2987  dl, PtrVT);
2988  return LowerGlobalAddress(GA, DAG);
2989  }
2990 
2991  if (CP->isMachineConstantPoolEntry())
2992  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2993  CP->getAlignment());
2994  else
2995  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2996  CP->getAlignment());
2997  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2998 }
2999 
3002 }
3003 
3004 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3005  SelectionDAG &DAG) const {
3006  MachineFunction &MF = DAG.getMachineFunction();
3008  unsigned ARMPCLabelIndex = 0;
3009  SDLoc DL(Op);
3010  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3011  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3012  SDValue CPAddr;
3013  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3014  if (!IsPositionIndependent) {
3015  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
3016  } else {
3017  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3018  ARMPCLabelIndex = AFI->createPICLabelUId();
3019  ARMConstantPoolValue *CPV =
3020  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3021  ARMCP::CPBlockAddress, PCAdj);
3022  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3023  }
3024  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3025  SDValue Result = DAG.getLoad(
3026  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3028  if (!IsPositionIndependent)
3029  return Result;
3030  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3031  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3032 }
3033 
3034 /// Convert a TLS address reference into the correct sequence of loads
3035 /// and calls to compute the variable's address for Darwin, and return an
3036 /// SDValue containing the final node.
3037 
3038 /// Darwin only has one TLS scheme which must be capable of dealing with the
3039 /// fully general situation, in the worst case. This means:
3040 /// + "extern __thread" declaration.
3041 /// + Defined in a possibly unknown dynamic library.
3042 ///
3043 /// The general system is that each __thread variable has a [3 x i32] descriptor
3044 /// which contains information used by the runtime to calculate the address. The
3045 /// only part of this the compiler needs to know about is the first word, which
3046 /// contains a function pointer that must be called with the address of the
3047 /// entire descriptor in "r0".
3048 ///
3049 /// Since this descriptor may be in a different unit, in general access must
3050 /// proceed along the usual ARM rules. A common sequence to produce is:
3051 ///
3052 /// movw rT1, :lower16:_var$non_lazy_ptr
3053 /// movt rT1, :upper16:_var$non_lazy_ptr
3054 /// ldr r0, [rT1]
3055 /// ldr rT2, [r0]
3056 /// blx rT2
3057 /// [...address now in r0...]
3058 SDValue
3059 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3060  SelectionDAG &DAG) const {
3061  assert(Subtarget->isTargetDarwin() &&
3062  "This function expects a Darwin target");
3063  SDLoc DL(Op);
3064 
3065  // First step is to get the address of the actua global symbol. This is where
3066  // the TLS descriptor lives.
3067  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3068 
3069  // The first entry in the descriptor is a function pointer that we must call
3070  // to obtain the address of the variable.
3071  SDValue Chain = DAG.getEntryNode();
3072  SDValue FuncTLVGet = DAG.getLoad(
3073  MVT::i32, DL, Chain, DescAddr,
3075  /* Alignment = */ 4,
3078  Chain = FuncTLVGet.getValue(1);
3079 
3081  MachineFrameInfo &MFI = F.getFrameInfo();
3082  MFI.setAdjustsStack(true);
3083 
3084  // TLS calls preserve all registers except those that absolutely must be
3085  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3086  // silly).
3087  auto TRI =
3088  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3089  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3090  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3091 
3092  // Finally, we can make the call. This is just a degenerate version of a
3093  // normal AArch64 call node: r0 takes the address of the descriptor, and
3094  // returns the address of the variable in this thread.
3095  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3096  Chain =
3098  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3099  DAG.getRegisterMask(Mask), Chain.getValue(1));
3100  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3101 }
3102 
3103 SDValue
3104 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3105  SelectionDAG &DAG) const {
3106  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3107 
3108  SDValue Chain = DAG.getEntryNode();
3109  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3110  SDLoc DL(Op);
3111 
3112  // Load the current TEB (thread environment block)
3113  SDValue Ops[] = {Chain,
3114  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3115  DAG.getTargetConstant(15, DL, MVT::i32),
3116  DAG.getTargetConstant(0, DL, MVT::i32),
3117  DAG.getTargetConstant(13, DL, MVT::i32),
3118  DAG.getTargetConstant(0, DL, MVT::i32),
3119  DAG.getTargetConstant(2, DL, MVT::i32)};
3120  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3121  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3122 
3123  SDValue TEB = CurrentTEB.getValue(0);
3124  Chain = CurrentTEB.getValue(1);
3125 
3126  // Load the ThreadLocalStoragePointer from the TEB
3127  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3128  SDValue TLSArray =
3129  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3130  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3131 
3132  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3133  // offset into the TLSArray.
3134 
3135  // Load the TLS index from the C runtime
3136  SDValue TLSIndex =
3137  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3138  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3139  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3140 
3141  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3142  DAG.getConstant(2, DL, MVT::i32));
3143  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3144  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3145  MachinePointerInfo());
3146 
3147  // Get the offset of the start of the .tls section (section base)
3148  const auto *GA = cast<GlobalAddressSDNode>(Op);
3149  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3150  SDValue Offset = DAG.getLoad(
3151  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3152  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3154 
3155  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3156 }
3157 
3158 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3159 SDValue
3160 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3161  SelectionDAG &DAG) const {
3162  SDLoc dl(GA);
3163  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3164  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3165  MachineFunction &MF = DAG.getMachineFunction();
3167  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3168  ARMConstantPoolValue *CPV =
3169  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3170  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3171  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3172  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3173  Argument = DAG.getLoad(
3174  PtrVT, dl, DAG.getEntryNode(), Argument,
3176  SDValue Chain = Argument.getValue(1);
3177 
3178  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3179  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3180 
3181  // call __tls_get_addr.
3182  ArgListTy Args;
3184  Entry.Node = Argument;
3185  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3186  Args.push_back(Entry);
3187 
3188  // FIXME: is there useful debug info available here?
3190  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3192  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3193 
3194  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3195  return CallResult.first;
3196 }
3197 
3198 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3199 // "local exec" model.
3200 SDValue
3201 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3202  SelectionDAG &DAG,
3203  TLSModel::Model model) const {
3204  const GlobalValue *GV = GA->getGlobal();
3205  SDLoc dl(GA);
3206  SDValue Offset;
3207  SDValue Chain = DAG.getEntryNode();
3208  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3209  // Get the Thread Pointer
3211 
3212  if (model == TLSModel::InitialExec) {
3213  MachineFunction &MF = DAG.getMachineFunction();
3215  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3216  // Initial exec model.
3217  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3218  ARMConstantPoolValue *CPV =
3219  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3221  true);
3222  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3223  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3224  Offset = DAG.getLoad(
3225  PtrVT, dl, Chain, Offset,
3227  Chain = Offset.getValue(1);
3228 
3229  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3230  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3231 
3232  Offset = DAG.getLoad(
3233  PtrVT, dl, Chain, Offset,
3235  } else {
3236  // local exec model
3237  assert(model == TLSModel::LocalExec);
3238  ARMConstantPoolValue *CPV =
3240  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3241  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3242  Offset = DAG.getLoad(
3243  PtrVT, dl, Chain, Offset,
3245  }
3246 
3247  // The address of the thread local variable is the add of the thread
3248  // pointer with the offset of the variable.
3249  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3250 }
3251 
3252 SDValue
3253 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3254  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3255  if (DAG.getTarget().useEmulatedTLS())
3256  return LowerToTLSEmulatedModel(GA, DAG);
3257 
3258  if (Subtarget->isTargetDarwin())
3259  return LowerGlobalTLSAddressDarwin(Op, DAG);
3260 
3261  if (Subtarget->isTargetWindows())
3262  return LowerGlobalTLSAddressWindows(Op, DAG);
3263 
3264  // TODO: implement the "local dynamic" model
3265  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3267 
3268  switch (model) {
3271  return LowerToTLSGeneralDynamicModel(GA, DAG);
3272  case TLSModel::InitialExec:
3273  case TLSModel::LocalExec:
3274  return LowerToTLSExecModels(GA, DAG, model);
3275  }
3276  llvm_unreachable("bogus TLS model");
3277 }
3278 
3279 /// Return true if all users of V are within function F, looking through
3280 /// ConstantExprs.
3281 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3282  SmallVector<const User*,4> Worklist;
3283  for (auto *U : V->users())
3284  Worklist.push_back(U);
3285  while (!Worklist.empty()) {
3286  auto *U = Worklist.pop_back_val();
3287  if (isa<ConstantExpr>(U)) {
3288  for (auto *UU : U->users())
3289  Worklist.push_back(UU);
3290  continue;
3291  }
3292 
3293  auto *I = dyn_cast<Instruction>(U);
3294  if (!I || I->getParent()->getParent() != F)
3295  return false;
3296  }
3297  return true;
3298 }
3299 
3301  const GlobalValue *GV, SelectionDAG &DAG,
3302  EVT PtrVT, const SDLoc &dl) {
3303  // If we're creating a pool entry for a constant global with unnamed address,
3304  // and the global is small enough, we can emit it inline into the constant pool
3305  // to save ourselves an indirection.
3306  //
3307  // This is a win if the constant is only used in one function (so it doesn't
3308  // need to be duplicated) or duplicating the constant wouldn't increase code
3309  // size (implying the constant is no larger than 4 bytes).
3310  const Function &F = DAG.getMachineFunction().getFunction();
3311 
3312  // We rely on this decision to inline being idemopotent and unrelated to the
3313  // use-site. We know that if we inline a variable at one use site, we'll
3314  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3315  // doesn't know about this optimization, so bail out if it's enabled else
3316  // we could decide to inline here (and thus never emit the GV) but require
3317  // the GV from fast-isel generated code.
3318  if (!EnableConstpoolPromotion ||
3320  return SDValue();
3321 
3322  auto *GVar = dyn_cast<GlobalVariable>(GV);
3323  if (!GVar || !GVar->hasInitializer() ||
3324  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3325  !GVar->hasLocalLinkage())
3326  return SDValue();
3327 
3328  // If we inline a value that contains relocations, we move the relocations
3329  // from .data to .text. This is not allowed in position-independent code.
3330  auto *Init = GVar->getInitializer();
3331  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3332  Init->needsRelocation())
3333  return SDValue();
3334 
3335  // The constant islands pass can only really deal with alignment requests
3336  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3337  // any type wanting greater alignment requirements than 4 bytes. We also
3338  // can only promote constants that are multiples of 4 bytes in size or
3339  // are paddable to a multiple of 4. Currently we only try and pad constants
3340  // that are strings for simplicity.
3341  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3342  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3343  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3344  unsigned RequiredPadding = 4 - (Size % 4);
3345  bool PaddingPossible =
3346  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3347  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3348  Size == 0)
3349  return SDValue();
3350 
3351  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3352  MachineFunction &MF = DAG.getMachineFunction();
3354 
3355  // We can't bloat the constant pool too much, else the ConstantIslands pass
3356  // may fail to converge. If we haven't promoted this global yet (it may have
3357  // multiple uses), and promoting it would increase the constant pool size (Sz
3358  // > 4), ensure we have space to do so up to MaxTotal.
3359  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3360  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3362  return SDValue();
3363 
3364  // This is only valid if all users are in a single function; we can't clone
3365  // the constant in general. The LLVM IR unnamed_addr allows merging
3366  // constants, but not cloning them.
3367  //
3368  // We could potentially allow cloning if we could prove all uses of the
3369  // constant in the current function don't care about the address, like
3370  // printf format strings. But that isn't implemented for now.
3371  if (!allUsersAreInFunction(GVar, &F))
3372  return SDValue();
3373 
3374  // We're going to inline this global. Pad it out if needed.
3375  if (RequiredPadding != 4) {
3376  StringRef S = CDAInit->getAsString();
3377 
3379  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3380  while (RequiredPadding--)
3381  V.push_back(0);
3382  Init = ConstantDataArray::get(*DAG.getContext(), V);
3383  }
3384 
3385  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3386  SDValue CPAddr =
3387  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3388  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3391  PaddedSize - 4);
3392  }
3393  ++NumConstpoolPromoted;
3394  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3395 }
3396 
3398  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3399  if (!(GV = GA->getBaseObject()))
3400  return false;
3401  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3402  return V->isConstant();
3403  return isa<Function>(GV);
3404 }
3405 
3406 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3407  SelectionDAG &DAG) const {
3408  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3409  default: llvm_unreachable("unknown object format");
3410  case Triple::COFF:
3411  return LowerGlobalAddressWindows(Op, DAG);
3412  case Triple::ELF:
3413  return LowerGlobalAddressELF(Op, DAG);
3414  case Triple::MachO:
3415  return LowerGlobalAddressDarwin(Op, DAG);
3416  }
3417 }
3418 
3419 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3420  SelectionDAG &DAG) const {
3421  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3422  SDLoc dl(Op);
3423  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3424  const TargetMachine &TM = getTargetMachine();
3425  bool IsRO = isReadOnly(GV);
3426 
3427  // promoteToConstantPool only if not generating XO text section
3428  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3429  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3430  return V;
3431 
3432  if (isPositionIndependent()) {
3433  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3434  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3435  UseGOT_PREL ? ARMII::MO_GOT : 0);
3436  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3437  if (UseGOT_PREL)
3438  Result =
3439  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3441  return Result;
3442  } else if (Subtarget->isROPI() && IsRO) {
3443  // PC-relative.
3444  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3445  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3446  return Result;
3447  } else if (Subtarget->isRWPI() && !IsRO) {
3448  // SB-relative.
3449  SDValue RelAddr;
3450  if (Subtarget->useMovt()) {
3451  ++NumMovwMovt;
3452  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3453  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3454  } else { // use literal pool for address constant
3455  ARMConstantPoolValue *CPV =
3457  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3458  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3459  RelAddr = DAG.getLoad(
3460  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3462  }
3463  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3464  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3465  return Result;
3466  }
3467 
3468  // If we have T2 ops, we can materialize the address directly via movt/movw
3469  // pair. This is always cheaper.
3470  if (Subtarget->useMovt()) {
3471  ++NumMovwMovt;
3472  // FIXME: Once remat is capable of dealing with instructions with register
3473  // operands, expand this into two nodes.
3474  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3475  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3476  } else {
3477  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3478  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3479  return DAG.getLoad(
3480  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3482  }
3483 }
3484 
3485 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3486  SelectionDAG &DAG) const {
3487  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3488  "ROPI/RWPI not currently supported for Darwin");
3489  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3490  SDLoc dl(Op);
3491  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3492 
3493  if (Subtarget->useMovt())
3494  ++NumMovwMovt;
3495 
3496  // FIXME: Once remat is capable of dealing with instructions with register
3497  // operands, expand this into multiple nodes
3498  unsigned Wrapper =
3500 
3501  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3502  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3503 
3504  if (Subtarget->isGVIndirectSymbol(GV))
3505  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3507  return Result;
3508 }
3509 
3510 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3511  SelectionDAG &DAG) const {
3512  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3513  assert(Subtarget->useMovt() &&
3514  "Windows on ARM expects to use movw/movt");
3515  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3516  "ROPI/RWPI not currently supported for Windows");
3517 
3518  const TargetMachine &TM = getTargetMachine();
3519  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3520  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3521  if (GV->hasDLLImportStorageClass())
3522  TargetFlags = ARMII::MO_DLLIMPORT;
3523  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3524  TargetFlags = ARMII::MO_COFFSTUB;
3525  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3526  SDValue Result;
3527  SDLoc DL(Op);
3528 
3529  ++NumMovwMovt;
3530 
3531  // FIXME: Once remat is capable of dealing with instructions with register
3532  // operands, expand this into two nodes.
3533  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3534  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3535  TargetFlags));
3536  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3537  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3539  return Result;
3540 }
3541 
3542 SDValue
3543 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3544  SDLoc dl(Op);
3545  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3546  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3547  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3548  Op.getOperand(1), Val);
3549 }
3550 
3551 SDValue
3552 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3553  SDLoc dl(Op);
3554  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3555  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3556 }
3557 
3558 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3559  SelectionDAG &DAG) const {
3560  SDLoc dl(Op);
3562  Op.getOperand(0));
3563 }
3564 
3565 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3566  SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3567  unsigned IntNo =
3568  cast<ConstantSDNode>(
3570  ->getZExtValue();
3571  switch (IntNo) {
3572  default:
3573  return SDValue(); // Don't custom lower most intrinsics.
3574  case Intrinsic::arm_gnu_eabi_mcount: {
3575  MachineFunction &MF = DAG.getMachineFunction();
3576  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3577  SDLoc dl(Op);
3578  SDValue Chain = Op.getOperand(0);
3579  // call "\01__gnu_mcount_nc"
3580  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3581  const uint32_t *Mask =
3583  assert(Mask && "Missing call preserved mask for calling convention");
3584  // Mark LR an implicit live-in.
3585  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3586  SDValue ReturnAddress =
3587  DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3588  std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
3589  SDValue Callee =
3590  DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3591  SDValue RegisterMask = DAG.getRegisterMask(Mask);
3592  if (Subtarget->isThumb())
3593  return SDValue(
3594  DAG.getMachineNode(
3595  ARM::tBL_PUSHLR, dl, ResultTys,
3596  {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3597  DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3598  0);
3599  return SDValue(
3600  DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3601  {ReturnAddress, Callee, RegisterMask, Chain}),
3602  0);
3603  }
3604  }
3605 }
3606 
3607 SDValue
3608 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3609  const ARMSubtarget *Subtarget) const {
3610  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3611  SDLoc dl(Op);
3612  switch (IntNo) {
3613  default: return SDValue(); // Don't custom lower most intrinsics.
3614  case Intrinsic::thread_pointer: {
3615  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3616  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3617  }
3618  case Intrinsic::eh_sjlj_lsda: {
3619  MachineFunction &MF = DAG.getMachineFunction();
3621  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3622  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3623  SDValue CPAddr;
3624  bool IsPositionIndependent = isPositionIndependent();
3625  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3626  ARMConstantPoolValue *CPV =
3627  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3628  ARMCP::CPLSDA, PCAdj);
3629  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3630  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3631  SDValue Result = DAG.getLoad(
3632  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3634 
3635  if (IsPositionIndependent) {
3636  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3637  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3638  }
3639  return Result;
3640  }
3641  case Intrinsic::arm_neon_vabs:
3642  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3643  Op.getOperand(1));
3644  case Intrinsic::arm_neon_vmulls:
3645  case Intrinsic::arm_neon_vmullu: {
3646  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3648  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3649  Op.getOperand(1), Op.getOperand(2));
3650  }
3651  case Intrinsic::arm_neon_vminnm:
3652  case Intrinsic::arm_neon_vmaxnm: {
3653  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3655  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3656  Op.getOperand(1), Op.getOperand(2));
3657  }
3658  case Intrinsic::arm_neon_vminu:
3659  case Intrinsic::arm_neon_vmaxu: {
3660  if (Op.getValueType().isFloatingPoint())
3661  return SDValue();
3662  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3663  ? ISD::UMIN : ISD::UMAX;
3664  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3665  Op.getOperand(1), Op.getOperand(2));
3666  }
3667  case Intrinsic::arm_neon_vmins:
3668  case Intrinsic::arm_neon_vmaxs: {
3669  // v{min,max}s is overloaded between signed integers and floats.
3670  if (!Op.getValueType().isFloatingPoint()) {
3671  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3672  ? ISD::SMIN : ISD::SMAX;
3673  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3674  Op.getOperand(1), Op.getOperand(2));
3675  }
3676  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3678  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3679  Op.getOperand(1), Op.getOperand(2));
3680  }
3681  case Intrinsic::arm_neon_vtbl1:
3682  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3683  Op.getOperand(1), Op.getOperand(2));
3684  case Intrinsic::arm_neon_vtbl2:
3685  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3686  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3687  }
3688 }
3689 
3691  const ARMSubtarget *Subtarget) {
3692  SDLoc dl(Op);
3693  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3694  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3695  if (SSID == SyncScope::SingleThread)
3696  return Op;
3697 
3698  if (!Subtarget->hasDataBarrier()) {
3699  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3700  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3701  // here.
3702  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3703  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3704  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3705  DAG.getConstant(0, dl, MVT::i32));
3706  }
3707 
3708  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3709  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3710  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3711  if (Subtarget->isMClass()) {
3712  // Only a full system barrier exists in the M-class architectures.
3713  Domain = ARM_MB::SY;
3714  } else if (Subtarget->preferISHSTBarriers() &&
3715  Ord == AtomicOrdering::Release) {
3716  // Swift happens to implement ISHST barriers in a way that's compatible with
3717  // Release semantics but weaker than ISH so we'd be fools not to use
3718  // it. Beware: other processors probably don't!
3719  Domain = ARM_MB::ISHST;
3720  }
3721 
3722  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3723  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3724  DAG.getConstant(Domain, dl, MVT::i32));
3725 }
3726 
3728  const ARMSubtarget *Subtarget) {
3729  // ARM pre v5TE and Thumb1 does not have preload instructions.
3730  if (!(Subtarget->isThumb2() ||
3731  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3732  // Just preserve the chain.
3733  return Op.getOperand(0);
3734 
3735  SDLoc dl(Op);
3736  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3737  if (!isRead &&
3738  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3739  // ARMv7 with MP extension has PLDW.
3740  return Op.getOperand(0);
3741 
3742  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3743  if (Subtarget->isThumb()) {
3744  // Invert the bits.
3745  isRead = ~isRead & 1;
3746  isData = ~isData & 1;
3747  }
3748 
3749  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3750  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3751  DAG.getConstant(isData, dl, MVT::i32));
3752 }
3753 
3755  MachineFunction &MF = DAG.getMachineFunction();
3756  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3757 
3758  // vastart just stores the address of the VarArgsFrameIndex slot into the
3759  // memory location argument.
3760  SDLoc dl(Op);
3761  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3762  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3763  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3764  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3765  MachinePointerInfo(SV));
3766 }
3767 
3768 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3769  CCValAssign &NextVA,
3770  SDValue &Root,
3771  SelectionDAG &DAG,
3772  const SDLoc &dl) const {
3773  MachineFunction &MF = DAG.getMachineFunction();
3775 
3776  const TargetRegisterClass *RC;
3777  if (AFI->isThumb1OnlyFunction())
3778  RC = &ARM::tGPRRegClass;
3779  else
3780  RC = &ARM::GPRRegClass;
3781 
3782  // Transform the arguments stored in physical registers into virtual ones.
3783  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3784  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3785 
3786  SDValue ArgValue2;
3787  if (NextVA.isMemLoc()) {
3788  MachineFrameInfo &MFI = MF.getFrameInfo();
3789  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3790 
3791  // Create load node to retrieve arguments from the stack.
3792  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3793  ArgValue2 = DAG.getLoad(
3794  MVT::i32, dl, Root, FIN,
3796  } else {
3797  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3798  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3799  }
3800  if (!Subtarget->isLittle())
3801  std::swap (ArgValue, ArgValue2);
3802  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3803 }
3804 
3805 // The remaining GPRs hold either the beginning of variable-argument
3806 // data, or the beginning of an aggregate passed by value (usually
3807 // byval). Either way, we allocate stack slots adjacent to the data
3808 // provided by our caller, and store the unallocated registers there.
3809 // If this is a variadic function, the va_list pointer will begin with
3810 // these values; otherwise, this reassembles a (byval) structure that
3811 // was split between registers and memory.
3812 // Return: The frame index registers were stored into.
3813 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3814  const SDLoc &dl, SDValue &Chain,
3815  const Value *OrigArg,
3816  unsigned InRegsParamRecordIdx,
3817  int ArgOffset, unsigned ArgSize) const {
3818  // Currently, two use-cases possible:
3819  // Case #1. Non-var-args function, and we meet first byval parameter.
3820  // Setup first unallocated register as first byval register;
3821  // eat all remained registers
3822  // (these two actions are performed by HandleByVal method).
3823  // Then, here, we initialize stack frame with
3824  // "store-reg" instructions.
3825  // Case #2. Var-args function, that doesn't contain byval parameters.
3826  // The same: eat all remained unallocated registers,
3827  // initialize stack frame.
3828 
3829  MachineFunction &MF = DAG.getMachineFunction();
3830  MachineFrameInfo &MFI = MF.getFrameInfo();
3832  unsigned RBegin, REnd;
3833  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3834  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3835  } else {
3836  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3837  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3838  REnd = ARM::R4;
3839  }
3840 
3841  if (REnd != RBegin)
3842  ArgOffset = -4 * (ARM::R4 - RBegin);
3843 
3844  auto PtrVT = getPointerTy(DAG.getDataLayout());
3845  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3846  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3847 
3848  SmallVector<SDValue, 4> MemOps;
3849  const TargetRegisterClass *RC =
3850  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3851 
3852  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3853  unsigned VReg = MF.addLiveIn(Reg, RC);
3854  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3855  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3856  MachinePointerInfo(OrigArg, 4 * i));
3857  MemOps.push_back(Store);
3858  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3859  }
3860 
3861  if (!MemOps.empty())
3862  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3863  return FrameIndex;
3864 }
3865 
3866 // Setup stack frame, the va_list pointer will start from.
3867 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3868  const SDLoc &dl, SDValue &Chain,
3869  unsigned ArgOffset,
3870  unsigned TotalArgRegsSaveSize,
3871  bool ForceMutable) const {
3872  MachineFunction &MF = DAG.getMachineFunction();
3874 
3875  // Try to store any remaining integer argument regs
3876  // to their spots on the stack so that they may be loaded by dereferencing
3877  // the result of va_next.
3878  // If there is no regs to be stored, just point address after last
3879  // argument passed via stack.
3880  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3881  CCInfo.getInRegsParamsCount(),
3882  CCInfo.getNextStackOffset(),
3883  std::max(4U, TotalArgRegsSaveSize));
3884  AFI->setVarArgsFrameIndex(FrameIndex);
3885 }
3886 
3887 SDValue ARMTargetLowering::LowerFormalArguments(
3888  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3889  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3890  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3891  MachineFunction &MF = DAG.getMachineFunction();
3892  MachineFrameInfo &MFI = MF.getFrameInfo();
3893 
3895 
3896  // Assign locations to all of the incoming arguments.
3898  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3899  *DAG.getContext());
3900  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3901 
3902  SmallVector<SDValue, 16> ArgValues;
3903  SDValue ArgValue;
3905  unsigned CurArgIdx = 0;
3906 
3907  // Initially ArgRegsSaveSize is zero.
3908  // Then we increase this value each time we meet byval parameter.
3909  // We also increase this value in case of varargs function.
3910  AFI->setArgRegsSaveSize(0);
3911 
3912  // Calculate the amount of stack space that we need to allocate to store
3913  // byval and variadic arguments that are passed in registers.
3914  // We need to know this before we allocate the first byval or variadic
3915  // argument, as they will be allocated a stack slot below the CFA (Canonical
3916  // Frame Address, the stack pointer at entry to the function).
3917  unsigned ArgRegBegin = ARM::R4;
3918  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3919  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3920  break;
3921 
3922  CCValAssign &VA = ArgLocs[i];
3923  unsigned Index = VA.getValNo();
3924  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3925  if (!Flags.isByVal())
3926  continue;
3927 
3928  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3929  unsigned RBegin, REnd;
3930  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3931  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3932 
3933  CCInfo.nextInRegsParam();
3934  }
3935  CCInfo.rewindByValRegsInfo();
3936 
3937  int lastInsIndex = -1;
3938  if (isVarArg && MFI.hasVAStart()) {
3939  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3940  if (RegIdx != array_lengthof(GPRArgRegs))
3941  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3942  }
3943 
3944  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3945  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3946  auto PtrVT = getPointerTy(DAG.getDataLayout());
3947 
3948  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3949  CCValAssign &VA = ArgLocs[i];
3950  if (Ins[VA.getValNo()].isOrigArg()) {
3951  std::advance(CurOrigArg,
3952  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3953  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3954  }
3955  // Arguments stored in registers.
3956  if (VA.isRegLoc()) {
3957  EVT RegVT = VA.getLocVT();
3958 
3959  if (VA.needsCustom()) {
3960  // f64 and vector types are split up into multiple registers or
3961  // combinations of registers and stack slots.
3962  if (VA.getLocVT() == MVT::v2f64) {
3963  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3964  Chain, DAG, dl);
3965  VA = ArgLocs[++i]; // skip ahead to next loc
3966  SDValue ArgValue2;
3967  if (VA.isMemLoc()) {
3968  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3969  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3970  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3972  DAG.getMachineFunction(), FI));
3973  } else {
3974  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3975  Chain, DAG, dl);
3976  }
3977  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3978  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3979  ArgValue, ArgValue1,
3980  DAG.getIntPtrConstant(0, dl));
3981  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3982  ArgValue, ArgValue2,
3983  DAG.getIntPtrConstant(1, dl));
3984  } else
3985  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3986  } else {
3987  const TargetRegisterClass *RC;
3988 
3989 
3990  if (RegVT == MVT::f16)
3991  RC = &ARM::HPRRegClass;
3992  else if (RegVT == MVT::f32)
3993  RC = &ARM::SPRRegClass;
3994  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3995  RC = &ARM::DPRRegClass;
3996  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3997  RC = &ARM::QPRRegClass;
3998  else if (RegVT == MVT::i32)
3999  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4000  : &ARM::GPRRegClass;
4001  else
4002  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4003 
4004  // Transform the arguments in physical registers into virtual ones.
4005  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4006  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4007 
4008  // If this value is passed in r0 and has the returned attribute (e.g.
4009  // C++ 'structors), record this fact for later use.
4010  if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4011  AFI->setPreservesR0();
4012  }
4013  }
4014 
4015  // If this is an 8 or 16-bit value, it is really passed promoted
4016  // to 32 bits. Insert an assert[sz]ext to capture this, then
4017  // truncate to the right size.
4018  switch (VA.getLocInfo()) {
4019  default: llvm_unreachable("Unknown loc info!");
4020  case CCValAssign::Full: break;
4021  case CCValAssign::BCvt:
4022  ArgValue =