LLVM  6.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARMISelLowering.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMPerfectShuffle.h"
22 #include "ARMRegisterInfo.h"
23 #include "ARMSelectionDAGInfo.h"
24 #include "ARMSubtarget.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/CallingConv.h"
68 #include "llvm/IR/Constant.h"
69 #include "llvm/IR/Constants.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugLoc.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GlobalAlias.h"
75 #include "llvm/IR/GlobalValue.h"
76 #include "llvm/IR/GlobalVariable.h"
77 #include "llvm/IR/IRBuilder.h"
78 #include "llvm/IR/InlineAsm.h"
79 #include "llvm/IR/Instruction.h"
80 #include "llvm/IR/Instructions.h"
81 #include "llvm/IR/IntrinsicInst.h"
82 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/Module.h"
84 #include "llvm/IR/Type.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/IR/Value.h"
87 #include "llvm/MC/MCInstrDesc.h"
89 #include "llvm/MC/MCRegisterInfo.h"
90 #include "llvm/MC/MCSchedule.h"
93 #include "llvm/Support/Casting.h"
94 #include "llvm/Support/CodeGen.h"
96 #include "llvm/Support/Compiler.h"
97 #include "llvm/Support/Debug.h"
99 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 
117 #define DEBUG_TYPE "arm-isel"
118 
119 STATISTIC(NumTailCalls, "Number of tail calls");
120 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
121 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
122 STATISTIC(NumConstpoolPromoted,
123  "Number of constants with their storage promoted into constant pools");
124 
125 static cl::opt<bool>
126 ARMInterworking("arm-interworking", cl::Hidden,
127  cl::desc("Enable / disable ARM interworking (for debugging only)"),
128  cl::init(true));
129 
131  "arm-promote-constant", cl::Hidden,
132  cl::desc("Enable / disable promotion of unnamed_addr constants into "
133  "constant pools"),
134  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
136  "arm-promote-constant-max-size", cl::Hidden,
137  cl::desc("Maximum size of constant to promote into a constant pool"),
138  cl::init(64));
140  "arm-promote-constant-max-total", cl::Hidden,
141  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142  cl::init(128));
143 
144 // The APCS parameter registers.
145 static const MCPhysReg GPRArgRegs[] = {
146  ARM::R0, ARM::R1, ARM::R2, ARM::R3
147 };
148 
149 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150  MVT PromotedBitwiseVT) {
151  if (VT != PromotedLdStVT) {
153  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154 
156  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157  }
158 
159  MVT ElemTy = VT.getVectorElementType();
160  if (ElemTy != MVT::f64)
164  if (ElemTy == MVT::i32) {
169  } else {
174  }
183  if (VT.isInteger()) {
187  }
188 
189  // Promote all bit-wise operations.
190  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197  }
198 
199  // Neon does not support vector divide/remainder operations.
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211 }
212 
213 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214  addRegisterClass(VT, &ARM::DPRRegClass);
215  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216 }
217 
218 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219  addRegisterClass(VT, &ARM::DPairRegClass);
220  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221 }
222 
224  const ARMSubtarget &STI)
225  : TargetLowering(TM), Subtarget(&STI) {
226  RegInfo = Subtarget->getRegisterInfo();
227  Itins = Subtarget->getInstrItineraryData();
228 
231 
232  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233  !Subtarget->isTargetWatchOS()) {
234  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
239  }
240 
241  if (Subtarget->isTargetMachO()) {
242  // Uses VFP for Thumb libfuncs if available.
243  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245  static const struct {
246  const RTLIB::Libcall Op;
247  const char * const Name;
248  const ISD::CondCode Cond;
249  } LibraryCalls[] = {
250  // Single-precision floating-point arithmetic.
251  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255 
256  // Double-precision floating-point arithmetic.
257  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261 
262  // Single-precision comparisons.
263  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271 
272  // Double-precision comparisons.
273  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281 
282  // Floating-point to integer conversions.
283  // i64 conversions are done via library routines even when generating VFP
284  // instructions, so use the same ones.
285  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289 
290  // Conversions between floating types.
291  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293 
294  // Integer to floating-point conversions.
295  // i64 conversions are done via library routines even when generating VFP
296  // instructions, so use the same ones.
297  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298  // e.g., __floatunsidf vs. __floatunssidfvfp.
299  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303  };
304 
305  for (const auto &LC : LibraryCalls) {
306  setLibcallName(LC.Op, LC.Name);
307  if (LC.Cond != ISD::SETCC_INVALID)
308  setCmpLibcallCC(LC.Op, LC.Cond);
309  }
310  }
311 
312  // Set the correct calling convention for ARMv7k WatchOS. It's just
313  // AAPCS_VFP for functions as simple as libcalls.
314  if (Subtarget->isTargetWatchABI()) {
315  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
317  }
318  }
319 
320  // These libcalls are not available in 32-bit.
321  setLibcallName(RTLIB::SHL_I128, nullptr);
322  setLibcallName(RTLIB::SRL_I128, nullptr);
323  setLibcallName(RTLIB::SRA_I128, nullptr);
324 
325  // RTLIB
326  if (Subtarget->isAAPCS_ABI() &&
327  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
328  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
329  static const struct {
330  const RTLIB::Libcall Op;
331  const char * const Name;
332  const CallingConv::ID CC;
333  const ISD::CondCode Cond;
334  } LibraryCalls[] = {
335  // Double-precision floating-point arithmetic helper functions
336  // RTABI chapter 4.1.2, Table 2
337  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
338  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
339  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341 
342  // Double-precision floating-point comparison helper functions
343  // RTABI chapter 4.1.2, Table 3
344  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
345  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
346  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
347  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
348  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
349  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
350  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
351  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
352 
353  // Single-precision floating-point arithmetic helper functions
354  // RTABI chapter 4.1.2, Table 4
355  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
356  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
357  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359 
360  // Single-precision floating-point comparison helper functions
361  // RTABI chapter 4.1.2, Table 5
362  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
363  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
364  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
365  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
366  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
367  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
368  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
369  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
370 
371  // Floating-point to integer conversions.
372  // RTABI chapter 4.1.2, Table 6
373  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
375  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 
382  // Conversions between floating types.
383  // RTABI chapter 4.1.2, Table 7
384  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387 
388  // Integer to floating-point conversions.
389  // RTABI chapter 4.1.2, Table 8
390  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
392  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 
399  // Long long helper functions
400  // RTABI chapter 4.2, Table 9
401  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 
406  // Integer division functions
407  // RTABI chapter 4.3.1
408  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
410  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416  };
417 
418  for (const auto &LC : LibraryCalls) {
419  setLibcallName(LC.Op, LC.Name);
420  setLibcallCallingConv(LC.Op, LC.CC);
421  if (LC.Cond != ISD::SETCC_INVALID)
422  setCmpLibcallCC(LC.Op, LC.Cond);
423  }
424 
425  // EABI dependent RTLIB
426  if (TM.Options.EABIVersion == EABI::EABI4 ||
428  static const struct {
429  const RTLIB::Libcall Op;
430  const char *const Name;
431  const CallingConv::ID CC;
432  const ISD::CondCode Cond;
433  } MemOpsLibraryCalls[] = {
434  // Memory operations
435  // RTABI chapter 4.3.4
437  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
438  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439  };
440 
441  for (const auto &LC : MemOpsLibraryCalls) {
442  setLibcallName(LC.Op, LC.Name);
443  setLibcallCallingConv(LC.Op, LC.CC);
444  if (LC.Cond != ISD::SETCC_INVALID)
445  setCmpLibcallCC(LC.Op, LC.Cond);
446  }
447  }
448  }
449 
450  if (Subtarget->isTargetWindows()) {
451  static const struct {
452  const RTLIB::Libcall Op;
453  const char * const Name;
454  const CallingConv::ID CC;
455  } LibraryCalls[] = {
456  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
457  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
458  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
459  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
460  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
461  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
462  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
463  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
464  };
465 
466  for (const auto &LC : LibraryCalls) {
467  setLibcallName(LC.Op, LC.Name);
468  setLibcallCallingConv(LC.Op, LC.CC);
469  }
470  }
471 
472  // Use divmod compiler-rt calls for iOS 5.0 and later.
473  if (Subtarget->isTargetMachO() &&
474  !(Subtarget->isTargetIOS() &&
475  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
476  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
477  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
478  }
479 
480  // The half <-> float conversion functions are always soft-float on
481  // non-watchos platforms, but are needed for some targets which use a
482  // hard-float calling convention by default.
483  if (!Subtarget->isTargetWatchABI()) {
484  if (Subtarget->isAAPCS_ABI()) {
485  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
486  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
487  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
488  } else {
489  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
490  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
491  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
492  }
493  }
494 
495  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
496  // a __gnu_ prefix (which is the default).
497  if (Subtarget->isTargetAEABI()) {
498  static const struct {
499  const RTLIB::Libcall Op;
500  const char * const Name;
501  const CallingConv::ID CC;
502  } LibraryCalls[] = {
503  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
504  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
505  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
506  };
507 
508  for (const auto &LC : LibraryCalls) {
509  setLibcallName(LC.Op, LC.Name);
510  setLibcallCallingConv(LC.Op, LC.CC);
511  }
512  }
513 
514  if (Subtarget->isThumb1Only())
515  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
516  else
517  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
518 
519  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
520  !Subtarget->isThumb1Only()) {
521  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
522  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
523  }
524 
525  for (MVT VT : MVT::vector_valuetypes()) {
526  for (MVT InnerVT : MVT::vector_valuetypes()) {
527  setTruncStoreAction(VT, InnerVT, Expand);
528  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
529  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
530  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
531  }
532 
537 
539  }
540 
543 
546 
547  if (Subtarget->hasNEON()) {
548  addDRTypeForNEON(MVT::v2f32);
549  addDRTypeForNEON(MVT::v8i8);
550  addDRTypeForNEON(MVT::v4i16);
551  addDRTypeForNEON(MVT::v2i32);
552  addDRTypeForNEON(MVT::v1i64);
553 
554  addQRTypeForNEON(MVT::v4f32);
555  addQRTypeForNEON(MVT::v2f64);
556  addQRTypeForNEON(MVT::v16i8);
557  addQRTypeForNEON(MVT::v8i16);
558  addQRTypeForNEON(MVT::v4i32);
559  addQRTypeForNEON(MVT::v2i64);
560 
561  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
562  // neither Neon nor VFP support any arithmetic operations on it.
563  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
564  // supported for v4f32.
568  // FIXME: Code duplication: FDIV and FREM are expanded always, see
569  // ARMTargetLowering::addTypeForNEON method for details.
572  // FIXME: Create unittest.
573  // In another words, find a way when "copysign" appears in DAG with vector
574  // operands.
576  // FIXME: Code duplication: SETCC has custom operation action, see
577  // ARMTargetLowering::addTypeForNEON method for details.
579  // FIXME: Create unittest for FNEG and for FABS.
591  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
598 
613 
614  // Mark v2f32 intrinsics.
629 
630  // Neon does not support some operations on v1i64 and v2i64 types.
632  // Custom handling for some quad-vector types to detect VMULL.
636  // Custom handling for some vector types to avoid expensive expansions
641  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
642  // a destination type that is wider than the source, and nor does
643  // it have a FP_TO_[SU]INT instruction with a narrower destination than
644  // source.
649 
652 
653  // NEON does not have single instruction CTPOP for vectors with element
654  // types wider than 8-bits. However, custom lowering can leverage the
655  // v8i8/v16i8 vcnt instruction.
662 
665 
666  // NEON does not have single instruction CTTZ for vectors.
671 
676 
681 
686 
687  // NEON only has FMA instructions as of VFP4.
688  if (!Subtarget->hasVFP4()) {
691  }
692 
710 
711  // It is legal to extload from v4i8 to v4i16 or v4i32.
713  MVT::v2i32}) {
714  for (MVT VT : MVT::integer_vector_valuetypes()) {
718  }
719  }
720  }
721 
722  if (Subtarget->isFPOnlySP()) {
723  // When targeting a floating-point unit with only single-precision
724  // operations, f64 is legal for the few double-precision instructions which
725  // are present However, no double-precision operations other than moves,
726  // loads and stores are provided by the hardware.
759  }
760 
762 
763  // ARM does not have floating-point extending loads.
764  for (MVT VT : MVT::fp_valuetypes()) {
767  }
768 
769  // ... or truncating stores
773 
774  // ARM does not have i1 sign extending load.
775  for (MVT VT : MVT::integer_valuetypes())
777 
778  // ARM supports all 4 flavors of integer indexed load / store.
779  if (!Subtarget->isThumb1Only()) {
780  for (unsigned im = (unsigned)ISD::PRE_INC;
790  }
791  } else {
792  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
795  }
796 
801 
802  // i64 operation support.
805  if (Subtarget->isThumb1Only()) {
808  }
809  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
810  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
812 
819 
824 
825  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
827 
828  // ARM does not have ROTL.
830  for (MVT VT : MVT::vector_valuetypes()) {
833  }
836  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
838 
839  // @llvm.readcyclecounter requires the Performance Monitors extension.
840  // Default to the 0 expansion on unsupported platforms.
841  // FIXME: Technically there are older ARM CPUs that have
842  // implementation-specific ways of obtaining this information.
843  if (Subtarget->hasPerfMon())
845 
846  // Only ARMv6 has BSWAP.
847  if (!Subtarget->hasV6Ops())
849 
850  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
851  : Subtarget->hasDivideInARMMode();
852  if (!hasDivide) {
853  // These are expanded into libcalls if the cpu doesn't have HW divider.
856  }
857 
858  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
861 
864  }
865 
868 
869  // Register based DivRem for AEABI (RTABI 4.2)
870  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
871  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
872  Subtarget->isTargetWindows()) {
875  HasStandaloneRem = false;
876 
877  if (Subtarget->isTargetWindows()) {
878  const struct {
879  const RTLIB::Libcall Op;
880  const char * const Name;
881  const CallingConv::ID CC;
882  } LibraryCalls[] = {
883  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
884  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
885  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
886  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
887 
888  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
889  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
890  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
891  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
892  };
893 
894  for (const auto &LC : LibraryCalls) {
895  setLibcallName(LC.Op, LC.Name);
896  setLibcallCallingConv(LC.Op, LC.CC);
897  }
898  } else {
899  const struct {
900  const RTLIB::Libcall Op;
901  const char * const Name;
902  const CallingConv::ID CC;
903  } LibraryCalls[] = {
904  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
905  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
906  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
907  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
908 
909  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
910  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
911  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
912  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
913  };
914 
915  for (const auto &LC : LibraryCalls) {
916  setLibcallName(LC.Op, LC.Name);
917  setLibcallCallingConv(LC.Op, LC.CC);
918  }
919  }
920 
925  } else {
928  }
929 
930  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
931  for (auto &VT : {MVT::f32, MVT::f64})
933 
938 
940 
941  // Use the default implementation.
948 
949  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
951  else
953 
954  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
955  // the default expansion.
956  InsertFencesForAtomic = false;
957  if (Subtarget->hasAnyDataBarrier() &&
958  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
959  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
960  // to ldrex/strex loops already.
962  if (!Subtarget->isThumb() || !Subtarget->isMClass())
964 
965  // On v8, we have particularly efficient implementations of atomic fences
966  // if they can be combined with nearby atomic loads and stores.
967  if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
968  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
969  InsertFencesForAtomic = true;
970  }
971  } else {
972  // If there's anything we can use as a barrier, go through custom lowering
973  // for ATOMIC_FENCE.
974  // If target has DMB in thumb, Fences can be inserted.
975  if (Subtarget->hasDataBarrier())
976  InsertFencesForAtomic = true;
977 
979  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
980 
981  // Set them all for expansion, which will force libcalls.
994  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
995  // Unordered/Monotonic case.
996  if (!InsertFencesForAtomic) {
999  }
1000  }
1001 
1003 
1004  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1005  if (!Subtarget->hasV6Ops()) {
1008  }
1010 
1011  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1012  !Subtarget->isThumb1Only()) {
1013  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1014  // iff target supports vfp2.
1017  }
1018 
1019  // We want to custom lower some of our intrinsics.
1024  if (Subtarget->useSjLjEH())
1025  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1026 
1036 
1037  // Thumb-1 cannot currently select ARMISD::SUBE.
1038  if (!Subtarget->isThumb1Only())
1040 
1046 
1047  // We don't support sin/cos/fmod/copysign/pow
1056  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1057  !Subtarget->isThumb1Only()) {
1060  }
1063 
1064  if (!Subtarget->hasVFP4()) {
1067  }
1068 
1069  // Various VFP goodness
1070  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1071  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1072  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1075  }
1076 
1077  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1078  if (!Subtarget->hasFP16()) {
1081  }
1082  }
1083 
1084  // Combine sin / cos into one node or libcall if possible.
1085  if (Subtarget->hasSinCos()) {
1086  setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1087  setLibcallName(RTLIB::SINCOS_F64, "sincos");
1088  if (Subtarget->isTargetWatchABI()) {
1091  }
1092  if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1093  // For iOS, we don't want to the normal expansion of a libcall to
1094  // sincos. We want to issue a libcall to __sincos_stret.
1097  }
1098  }
1099 
1100  // FP-ARMv8 implements a lot of rounding-like FP operations.
1101  if (Subtarget->hasFPARMv8()) {
1114 
1115  if (!Subtarget->isFPOnlySP()) {
1124  }
1125  }
1126 
1127  if (Subtarget->hasNEON()) {
1128  // vmin and vmax aren't available in a scalar form, so we use
1129  // a NEON instruction with an undef lane instead.
1136  }
1137 
1138  // We have target-specific dag combine patterns for the following nodes:
1139  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1146 
1147  if (Subtarget->hasV6Ops())
1149 
1151 
1152  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1153  !Subtarget->hasVFP2())
1155  else
1157 
1158  //// temporary - rewrite interface to use type
1159  MaxStoresPerMemset = 8;
1161  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1163  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1165 
1166  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1167  // are at least 4 bytes aligned.
1169 
1170  // Prefer likely predicted branches to selects on out-of-order cores.
1171  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1172 
1173  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1174 }
1175 
1177  return Subtarget->useSoftFloat();
1178 }
1179 
1180 // FIXME: It might make sense to define the representative register class as the
1181 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1182 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1183 // SPR's representative would be DPR_VFP2. This should work well if register
1184 // pressure tracking were modified such that a register use would increment the
1185 // pressure of the register class's representative and all of it's super
1186 // classes' representatives transitively. We have not implemented this because
1187 // of the difficulty prior to coalescing of modeling operand register classes
1188 // due to the common occurrence of cross class copies and subregister insertions
1189 // and extractions.
1190 std::pair<const TargetRegisterClass *, uint8_t>
1192  MVT VT) const {
1193  const TargetRegisterClass *RRC = nullptr;
1194  uint8_t Cost = 1;
1195  switch (VT.SimpleTy) {
1196  default:
1198  // Use DPR as representative register class for all floating point
1199  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1200  // the cost is 1 for both f32 and f64.
1201  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1202  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1203  RRC = &ARM::DPRRegClass;
1204  // When NEON is used for SP, only half of the register file is available
1205  // because operations that define both SP and DP results will be constrained
1206  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1207  // coalescing by double-counting the SP regs. See the FIXME above.
1208  if (Subtarget->useNEONForSinglePrecisionFP())
1209  Cost = 2;
1210  break;
1211  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1212  case MVT::v4f32: case MVT::v2f64:
1213  RRC = &ARM::DPRRegClass;
1214  Cost = 2;
1215  break;
1216  case MVT::v4i64:
1217  RRC = &ARM::DPRRegClass;
1218  Cost = 4;
1219  break;
1220  case MVT::v8i64:
1221  RRC = &ARM::DPRRegClass;
1222  Cost = 8;
1223  break;
1224  }
1225  return std::make_pair(RRC, Cost);
1226 }
1227 
1228 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1229  switch ((ARMISD::NodeType)Opcode) {
1230  case ARMISD::FIRST_NUMBER: break;
1231  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1232  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1233  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1234  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1235  case ARMISD::CALL: return "ARMISD::CALL";
1236  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1237  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1238  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1239  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1240  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1241  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1242  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1243  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1244  case ARMISD::CMP: return "ARMISD::CMP";
1245  case ARMISD::CMN: return "ARMISD::CMN";
1246  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1247  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1248  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1249  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1250  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1251 
1252  case ARMISD::CMOV: return "ARMISD::CMOV";
1253 
1254  case ARMISD::SSAT: return "ARMISD::SSAT";
1255 
1256  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1257  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1258  case ARMISD::RRX: return "ARMISD::RRX";
1259 
1260  case ARMISD::ADDC: return "ARMISD::ADDC";
1261  case ARMISD::ADDE: return "ARMISD::ADDE";
1262  case ARMISD::SUBC: return "ARMISD::SUBC";
1263  case ARMISD::SUBE: return "ARMISD::SUBE";
1264 
1265  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1266  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1267 
1268  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1269  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1270  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1271 
1272  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1273 
1274  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1275 
1276  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1277 
1278  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1279 
1280  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1281 
1282  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1283  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1284 
1285  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1286  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1287  case ARMISD::VCGE: return "ARMISD::VCGE";
1288  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1289  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1290  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1291  case ARMISD::VCGT: return "ARMISD::VCGT";
1292  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1293  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1294  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1295  case ARMISD::VTST: return "ARMISD::VTST";
1296 
1297  case ARMISD::VSHL: return "ARMISD::VSHL";
1298  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1299  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1300  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1301  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1302  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1303  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1304  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1305  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1306  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1307  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1308  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1309  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1310  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1311  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1312  case ARMISD::VSLI: return "ARMISD::VSLI";
1313  case ARMISD::VSRI: return "ARMISD::VSRI";
1314  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1315  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1316  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1317  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1318  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1319  case ARMISD::VDUP: return "ARMISD::VDUP";
1320  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1321  case ARMISD::VEXT: return "ARMISD::VEXT";
1322  case ARMISD::VREV64: return "ARMISD::VREV64";
1323  case ARMISD::VREV32: return "ARMISD::VREV32";
1324  case ARMISD::VREV16: return "ARMISD::VREV16";
1325  case ARMISD::VZIP: return "ARMISD::VZIP";
1326  case ARMISD::VUZP: return "ARMISD::VUZP";
1327  case ARMISD::VTRN: return "ARMISD::VTRN";
1328  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1329  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1330  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1331  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1332  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1333  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1334  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1335  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1336  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1337  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1338  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1339  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1340  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1341  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1342  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1343  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1344  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1345  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1346  case ARMISD::BFI: return "ARMISD::BFI";
1347  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1348  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1349  case ARMISD::VBSL: return "ARMISD::VBSL";
1350  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1351  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1352  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1353  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1354  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1355  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1356  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1357  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1358  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1359  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1360  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1361  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1362  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1363  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1364  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1365  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1366  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1367  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1368  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1369  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1370  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1371  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1372  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1373  }
1374  return nullptr;
1375 }
1376 
1378  EVT VT) const {
1379  if (!VT.isVector())
1380  return getPointerTy(DL);
1382 }
1383 
1384 /// getRegClassFor - Return the register class that should be used for the
1385 /// specified value type.
1387  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1388  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1389  // load / store 4 to 8 consecutive D registers.
1390  if (Subtarget->hasNEON()) {
1391  if (VT == MVT::v4i64)
1392  return &ARM::QQPRRegClass;
1393  if (VT == MVT::v8i64)
1394  return &ARM::QQQQPRRegClass;
1395  }
1396  return TargetLowering::getRegClassFor(VT);
1397 }
1398 
1399 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1400 // source/dest is aligned and the copy size is large enough. We therefore want
1401 // to align such objects passed to memory intrinsics.
1403  unsigned &PrefAlign) const {
1404  if (!isa<MemIntrinsic>(CI))
1405  return false;
1406  MinSize = 8;
1407  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1408  // cycle faster than 4-byte aligned LDM.
1409  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1410  return true;
1411 }
1412 
1413 // Create a fast isel object.
1414 FastISel *
1416  const TargetLibraryInfo *libInfo) const {
1417  return ARM::createFastISel(funcInfo, libInfo);
1418 }
1419 
1421  unsigned NumVals = N->getNumValues();
1422  if (!NumVals)
1423  return Sched::RegPressure;
1424 
1425  for (unsigned i = 0; i != NumVals; ++i) {
1426  EVT VT = N->getValueType(i);
1427  if (VT == MVT::Glue || VT == MVT::Other)
1428  continue;
1429  if (VT.isFloatingPoint() || VT.isVector())
1430  return Sched::ILP;
1431  }
1432 
1433  if (!N->isMachineOpcode())
1434  return Sched::RegPressure;
1435 
1436  // Load are scheduled for latency even if there instruction itinerary
1437  // is not available.
1438  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1439  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1440 
1441  if (MCID.getNumDefs() == 0)
1442  return Sched::RegPressure;
1443  if (!Itins->isEmpty() &&
1444  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1445  return Sched::ILP;
1446 
1447  return Sched::RegPressure;
1448 }
1449 
1450 //===----------------------------------------------------------------------===//
1451 // Lowering Code
1452 //===----------------------------------------------------------------------===//
1453 
1454 static bool isSRL16(const SDValue &Op) {
1455  if (Op.getOpcode() != ISD::SRL)
1456  return false;
1457  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1458  return Const->getZExtValue() == 16;
1459  return false;
1460 }
1461 
1462 static bool isSRA16(const SDValue &Op) {
1463  if (Op.getOpcode() != ISD::SRA)
1464  return false;
1465  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1466  return Const->getZExtValue() == 16;
1467  return false;
1468 }
1469 
1470 static bool isSHL16(const SDValue &Op) {
1471  if (Op.getOpcode() != ISD::SHL)
1472  return false;
1473  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1474  return Const->getZExtValue() == 16;
1475  return false;
1476 }
1477 
1478 // Check for a signed 16-bit value. We special case SRA because it makes it
1479 // more simple when also looking for SRAs that aren't sign extending a
1480 // smaller value. Without the check, we'd need to take extra care with
1481 // checking order for some operations.
1482 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1483  if (isSRA16(Op))
1484  return isSHL16(Op.getOperand(0));
1485  return DAG.ComputeNumSignBits(Op) == 17;
1486 }
1487 
1488 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1490  switch (CC) {
1491  default: llvm_unreachable("Unknown condition code!");
1492  case ISD::SETNE: return ARMCC::NE;
1493  case ISD::SETEQ: return ARMCC::EQ;
1494  case ISD::SETGT: return ARMCC::GT;
1495  case ISD::SETGE: return ARMCC::GE;
1496  case ISD::SETLT: return ARMCC::LT;
1497  case ISD::SETLE: return ARMCC::LE;
1498  case ISD::SETUGT: return ARMCC::HI;
1499  case ISD::SETUGE: return ARMCC::HS;
1500  case ISD::SETULT: return ARMCC::LO;
1501  case ISD::SETULE: return ARMCC::LS;
1502  }
1503 }
1504 
1505 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1507  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1508  CondCode2 = ARMCC::AL;
1509  InvalidOnQNaN = true;
1510  switch (CC) {
1511  default: llvm_unreachable("Unknown FP condition!");
1512  case ISD::SETEQ:
1513  case ISD::SETOEQ:
1514  CondCode = ARMCC::EQ;
1515  InvalidOnQNaN = false;
1516  break;
1517  case ISD::SETGT:
1518  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1519  case ISD::SETGE:
1520  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1521  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1522  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1523  case ISD::SETONE:
1524  CondCode = ARMCC::MI;
1525  CondCode2 = ARMCC::GT;
1526  InvalidOnQNaN = false;
1527  break;
1528  case ISD::SETO: CondCode = ARMCC::VC; break;
1529  case ISD::SETUO: CondCode = ARMCC::VS; break;
1530  case ISD::SETUEQ:
1531  CondCode = ARMCC::EQ;
1532  CondCode2 = ARMCC::VS;
1533  InvalidOnQNaN = false;
1534  break;
1535  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1536  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1537  case ISD::SETLT:
1538  case ISD::SETULT: CondCode = ARMCC::LT; break;
1539  case ISD::SETLE:
1540  case ISD::SETULE: CondCode = ARMCC::LE; break;
1541  case ISD::SETNE:
1542  case ISD::SETUNE:
1543  CondCode = ARMCC::NE;
1544  InvalidOnQNaN = false;
1545  break;
1546  }
1547 }
1548 
1549 //===----------------------------------------------------------------------===//
1550 // Calling Convention Implementation
1551 //===----------------------------------------------------------------------===//
1552 
1553 #include "ARMGenCallingConv.inc"
1554 
1555 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1556 /// account presence of floating point hardware and calling convention
1557 /// limitations, such as support for variadic functions.
1559 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1560  bool isVarArg) const {
1561  switch (CC) {
1562  default:
1563  report_fatal_error("Unsupported calling convention");
1565  case CallingConv::ARM_APCS:
1566  case CallingConv::GHC:
1567  return CC;
1571  case CallingConv::Swift:
1573  case CallingConv::C:
1574  if (!Subtarget->isAAPCS_ABI())
1575  return CallingConv::ARM_APCS;
1576  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1578  !isVarArg)
1580  else
1581  return CallingConv::ARM_AAPCS;
1582  case CallingConv::Fast:
1584  if (!Subtarget->isAAPCS_ABI()) {
1585  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1586  return CallingConv::Fast;
1587  return CallingConv::ARM_APCS;
1588  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1590  else
1591  return CallingConv::ARM_AAPCS;
1592  }
1593 }
1594 
1596  bool isVarArg) const {
1597  return CCAssignFnForNode(CC, false, isVarArg);
1598 }
1599 
1601  bool isVarArg) const {
1602  return CCAssignFnForNode(CC, true, isVarArg);
1603 }
1604 
1605 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1606 /// CallingConvention.
1607 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1608  bool Return,
1609  bool isVarArg) const {
1610  switch (getEffectiveCallingConv(CC, isVarArg)) {
1611  default:
1612  report_fatal_error("Unsupported calling convention");
1613  case CallingConv::ARM_APCS:
1614  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1616  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1618  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1619  case CallingConv::Fast:
1620  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1621  case CallingConv::GHC:
1622  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1624  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1625  }
1626 }
1627 
1628 /// LowerCallResult - Lower the result values of a call into the
1629 /// appropriate copies out of appropriate physical registers.
1630 SDValue ARMTargetLowering::LowerCallResult(
1631  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1632  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1633  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1634  SDValue ThisVal) const {
1635  // Assign locations to each value returned by this call.
1637  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1638  *DAG.getContext());
1639  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1640 
1641  // Copy all of the result registers out of their specified physreg.
1642  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1643  CCValAssign VA = RVLocs[i];
1644 
1645  // Pass 'this' value directly from the argument to return value, to avoid
1646  // reg unit interference
1647  if (i == 0 && isThisReturn) {
1648  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1649  "unexpected return calling convention register assignment");
1650  InVals.push_back(ThisVal);
1651  continue;
1652  }
1653 
1654  SDValue Val;
1655  if (VA.needsCustom()) {
1656  // Handle f64 or half of a v2f64.
1657  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1658  InFlag);
1659  Chain = Lo.getValue(1);
1660  InFlag = Lo.getValue(2);
1661  VA = RVLocs[++i]; // skip ahead to next loc
1662  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1663  InFlag);
1664  Chain = Hi.getValue(1);
1665  InFlag = Hi.getValue(2);
1666  if (!Subtarget->isLittle())
1667  std::swap (Lo, Hi);
1668  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1669 
1670  if (VA.getLocVT() == MVT::v2f64) {
1671  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1672  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1673  DAG.getConstant(0, dl, MVT::i32));
1674 
1675  VA = RVLocs[++i]; // skip ahead to next loc
1676  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1677  Chain = Lo.getValue(1);
1678  InFlag = Lo.getValue(2);
1679  VA = RVLocs[++i]; // skip ahead to next loc
1680  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1681  Chain = Hi.getValue(1);
1682  InFlag = Hi.getValue(2);
1683  if (!Subtarget->isLittle())
1684  std::swap (Lo, Hi);
1685  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1686  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1687  DAG.getConstant(1, dl, MVT::i32));
1688  }
1689  } else {
1690  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1691  InFlag);
1692  Chain = Val.getValue(1);
1693  InFlag = Val.getValue(2);
1694  }
1695 
1696  switch (VA.getLocInfo()) {
1697  default: llvm_unreachable("Unknown loc info!");
1698  case CCValAssign::Full: break;
1699  case CCValAssign::BCvt:
1700  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1701  break;
1702  }
1703 
1704  InVals.push_back(Val);
1705  }
1706 
1707  return Chain;
1708 }
1709 
1710 /// LowerMemOpCallTo - Store the argument to the stack.
1711 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1712  SDValue Arg, const SDLoc &dl,
1713  SelectionDAG &DAG,
1714  const CCValAssign &VA,
1715  ISD::ArgFlagsTy Flags) const {
1716  unsigned LocMemOffset = VA.getLocMemOffset();
1717  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1718  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1719  StackPtr, PtrOff);
1720  return DAG.getStore(
1721  Chain, dl, Arg, PtrOff,
1722  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1723 }
1724 
1725 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1726  SDValue Chain, SDValue &Arg,
1727  RegsToPassVector &RegsToPass,
1728  CCValAssign &VA, CCValAssign &NextVA,
1729  SDValue &StackPtr,
1730  SmallVectorImpl<SDValue> &MemOpChains,
1731  ISD::ArgFlagsTy Flags) const {
1732  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1733  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1734  unsigned id = Subtarget->isLittle() ? 0 : 1;
1735  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1736 
1737  if (NextVA.isRegLoc())
1738  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1739  else {
1740  assert(NextVA.isMemLoc());
1741  if (!StackPtr.getNode())
1742  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1743  getPointerTy(DAG.getDataLayout()));
1744 
1745  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1746  dl, DAG, NextVA,
1747  Flags));
1748  }
1749 }
1750 
1751 /// LowerCall - Lowering a call into a callseq_start <-
1752 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1753 /// nodes.
1754 SDValue
1755 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1756  SmallVectorImpl<SDValue> &InVals) const {
1757  SelectionDAG &DAG = CLI.DAG;
1758  SDLoc &dl = CLI.DL;
1760  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1762  SDValue Chain = CLI.Chain;
1763  SDValue Callee = CLI.Callee;
1764  bool &isTailCall = CLI.IsTailCall;
1765  CallingConv::ID CallConv = CLI.CallConv;
1766  bool doesNotRet = CLI.DoesNotReturn;
1767  bool isVarArg = CLI.IsVarArg;
1768 
1769  MachineFunction &MF = DAG.getMachineFunction();
1770  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1771  bool isThisReturn = false;
1772  bool isSibCall = false;
1773  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1774 
1775  // Disable tail calls if they're not supported.
1776  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1777  isTailCall = false;
1778 
1779  if (isTailCall) {
1780  // Check if it's really possible to do a tail call.
1781  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1782  isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1783  Outs, OutVals, Ins, DAG);
1784  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1785  report_fatal_error("failed to perform tail call elimination on a call "
1786  "site marked musttail");
1787  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1788  // detected sibcalls.
1789  if (isTailCall) {
1790  ++NumTailCalls;
1791  isSibCall = true;
1792  }
1793  }
1794 
1795  // Analyze operands of the call, assigning locations to each operand.
1797  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1798  *DAG.getContext());
1799  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1800 
1801  // Get a count of how many bytes are to be pushed on the stack.
1802  unsigned NumBytes = CCInfo.getNextStackOffset();
1803 
1804  // For tail calls, memory operands are available in our caller's stack.
1805  if (isSibCall)
1806  NumBytes = 0;
1807 
1808  // Adjust the stack pointer for the new arguments...
1809  // These operations are automatically eliminated by the prolog/epilog pass
1810  if (!isSibCall)
1811  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1812 
1813  SDValue StackPtr =
1814  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1815 
1816  RegsToPassVector RegsToPass;
1817  SmallVector<SDValue, 8> MemOpChains;
1818 
1819  // Walk the register/memloc assignments, inserting copies/loads. In the case
1820  // of tail call optimization, arguments are handled later.
1821  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1822  i != e;
1823  ++i, ++realArgIdx) {
1824  CCValAssign &VA = ArgLocs[i];
1825  SDValue Arg = OutVals[realArgIdx];
1826  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1827  bool isByVal = Flags.isByVal();
1828 
1829  // Promote the value if needed.
1830  switch (VA.getLocInfo()) {
1831  default: llvm_unreachable("Unknown loc info!");
1832  case CCValAssign::Full: break;
1833  case CCValAssign::SExt:
1834  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1835  break;
1836  case CCValAssign::ZExt:
1837  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1838  break;
1839  case CCValAssign::AExt:
1840  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1841  break;
1842  case CCValAssign::BCvt:
1843  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1844  break;
1845  }
1846 
1847  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1848  if (VA.needsCustom()) {
1849  if (VA.getLocVT() == MVT::v2f64) {
1850  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1851  DAG.getConstant(0, dl, MVT::i32));
1852  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1853  DAG.getConstant(1, dl, MVT::i32));
1854 
1855  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1856  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1857 
1858  VA = ArgLocs[++i]; // skip ahead to next loc
1859  if (VA.isRegLoc()) {
1860  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1861  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1862  } else {
1863  assert(VA.isMemLoc());
1864 
1865  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1866  dl, DAG, VA, Flags));
1867  }
1868  } else {
1869  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1870  StackPtr, MemOpChains, Flags);
1871  }
1872  } else if (VA.isRegLoc()) {
1873  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1874  Outs[0].VT == MVT::i32) {
1875  assert(VA.getLocVT() == MVT::i32 &&
1876  "unexpected calling convention register assignment");
1877  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1878  "unexpected use of 'returned'");
1879  isThisReturn = true;
1880  }
1881  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1882  } else if (isByVal) {
1883  assert(VA.isMemLoc());
1884  unsigned offset = 0;
1885 
1886  // True if this byval aggregate will be split between registers
1887  // and memory.
1888  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1889  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1890 
1891  if (CurByValIdx < ByValArgsCount) {
1892 
1893  unsigned RegBegin, RegEnd;
1894  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1895 
1896  EVT PtrVT =
1898  unsigned int i, j;
1899  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1900  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1901  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1902  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1904  DAG.InferPtrAlignment(AddArg));
1905  MemOpChains.push_back(Load.getValue(1));
1906  RegsToPass.push_back(std::make_pair(j, Load));
1907  }
1908 
1909  // If parameter size outsides register area, "offset" value
1910  // helps us to calculate stack slot for remained part properly.
1911  offset = RegEnd - RegBegin;
1912 
1913  CCInfo.nextInRegsParam();
1914  }
1915 
1916  if (Flags.getByValSize() > 4*offset) {
1917  auto PtrVT = getPointerTy(DAG.getDataLayout());
1918  unsigned LocMemOffset = VA.getLocMemOffset();
1919  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1920  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1921  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1922  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1923  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1924  MVT::i32);
1925  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1926  MVT::i32);
1927 
1928  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1929  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1930  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1931  Ops));
1932  }
1933  } else if (!isSibCall) {
1934  assert(VA.isMemLoc());
1935 
1936  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1937  dl, DAG, VA, Flags));
1938  }
1939  }
1940 
1941  if (!MemOpChains.empty())
1942  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1943 
1944  // Build a sequence of copy-to-reg nodes chained together with token chain
1945  // and flag operands which copy the outgoing args into the appropriate regs.
1946  SDValue InFlag;
1947  // Tail call byval lowering might overwrite argument registers so in case of
1948  // tail call optimization the copies to registers are lowered later.
1949  if (!isTailCall)
1950  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1951  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1952  RegsToPass[i].second, InFlag);
1953  InFlag = Chain.getValue(1);
1954  }
1955 
1956  // For tail calls lower the arguments to the 'real' stack slot.
1957  if (isTailCall) {
1958  // Force all the incoming stack arguments to be loaded from the stack
1959  // before any new outgoing arguments are stored to the stack, because the
1960  // outgoing stack slots may alias the incoming argument stack slots, and
1961  // the alias isn't otherwise explicit. This is slightly more conservative
1962  // than necessary, because it means that each store effectively depends
1963  // on every argument instead of just those arguments it would clobber.
1964 
1965  // Do not flag preceding copytoreg stuff together with the following stuff.
1966  InFlag = SDValue();
1967  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1968  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1969  RegsToPass[i].second, InFlag);
1970  InFlag = Chain.getValue(1);
1971  }
1972  InFlag = SDValue();
1973  }
1974 
1975  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1976  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1977  // node so that legalize doesn't hack it.
1978  bool isDirect = false;
1979 
1980  const TargetMachine &TM = getTargetMachine();
1981  const Module *Mod = MF.getFunction()->getParent();
1982  const GlobalValue *GV = nullptr;
1983  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1984  GV = G->getGlobal();
1985  bool isStub =
1986  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1987 
1988  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1989  bool isLocalARMFunc = false;
1991  auto PtrVt = getPointerTy(DAG.getDataLayout());
1992 
1993  if (Subtarget->genLongCalls()) {
1994  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
1995  "long-calls codegen is not position independent!");
1996  // Handle a global address or an external symbol. If it's not one of
1997  // those, the target's already in a register, so we don't need to do
1998  // anything extra.
1999  if (isa<GlobalAddressSDNode>(Callee)) {
2000  // Create a constant pool entry for the callee address
2001  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2002  ARMConstantPoolValue *CPV =
2003  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2004 
2005  // Get the address of the callee into a register
2006  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2007  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2008  Callee = DAG.getLoad(
2009  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2011  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2012  const char *Sym = S->getSymbol();
2013 
2014  // Create a constant pool entry for the callee address
2015  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2016  ARMConstantPoolValue *CPV =
2018  ARMPCLabelIndex, 0);
2019  // Get the address of the callee into a register
2020  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2021  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2022  Callee = DAG.getLoad(
2023  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2025  }
2026  } else if (isa<GlobalAddressSDNode>(Callee)) {
2027  // If we're optimizing for minimum size and the function is called three or
2028  // more times in this block, we can improve codesize by calling indirectly
2029  // as BLXr has a 16-bit encoding.
2030  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2031  auto *BB = CLI.CS.getParent();
2032  bool PreferIndirect =
2033  Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2034  count_if(GV->users(), [&BB](const User *U) {
2035  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2036  }) > 2;
2037 
2038  if (!PreferIndirect) {
2039  isDirect = true;
2040  bool isDef = GV->isStrongDefinitionForLinker();
2041 
2042  // ARM call to a local ARM function is predicable.
2043  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2044  // tBX takes a register source operand.
2045  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2046  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2047  Callee = DAG.getNode(
2048  ARMISD::WrapperPIC, dl, PtrVt,
2049  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2050  Callee = DAG.getLoad(
2051  PtrVt, dl, DAG.getEntryNode(), Callee,
2053  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2055  } else if (Subtarget->isTargetCOFF()) {
2056  assert(Subtarget->isTargetWindows() &&
2057  "Windows is the only supported COFF target");
2058  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2061  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2062  TargetFlags);
2063  if (GV->hasDLLImportStorageClass())
2064  Callee =
2065  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2066  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2068  } else {
2069  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2070  }
2071  }
2072  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2073  isDirect = true;
2074  // tBX takes a register source operand.
2075  const char *Sym = S->getSymbol();
2076  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2077  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2078  ARMConstantPoolValue *CPV =
2080  ARMPCLabelIndex, 4);
2081  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2082  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2083  Callee = DAG.getLoad(
2084  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2086  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2087  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2088  } else {
2089  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2090  }
2091  }
2092 
2093  // FIXME: handle tail calls differently.
2094  unsigned CallOpc;
2095  if (Subtarget->isThumb()) {
2096  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2097  CallOpc = ARMISD::CALL_NOLINK;
2098  else
2099  CallOpc = ARMISD::CALL;
2100  } else {
2101  if (!isDirect && !Subtarget->hasV5TOps())
2102  CallOpc = ARMISD::CALL_NOLINK;
2103  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2104  // Emit regular call when code size is the priority
2105  !MF.getFunction()->optForMinSize())
2106  // "mov lr, pc; b _foo" to avoid confusing the RSP
2107  CallOpc = ARMISD::CALL_NOLINK;
2108  else
2109  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2110  }
2111 
2112  std::vector<SDValue> Ops;
2113  Ops.push_back(Chain);
2114  Ops.push_back(Callee);
2115 
2116  // Add argument registers to the end of the list so that they are known live
2117  // into the call.
2118  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2119  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2120  RegsToPass[i].second.getValueType()));
2121 
2122  // Add a register mask operand representing the call-preserved registers.
2123  if (!isTailCall) {
2124  const uint32_t *Mask;
2125  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2126  if (isThisReturn) {
2127  // For 'this' returns, use the R0-preserving mask if applicable
2128  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2129  if (!Mask) {
2130  // Set isThisReturn to false if the calling convention is not one that
2131  // allows 'returned' to be modeled in this way, so LowerCallResult does
2132  // not try to pass 'this' straight through
2133  isThisReturn = false;
2134  Mask = ARI->getCallPreservedMask(MF, CallConv);
2135  }
2136  } else
2137  Mask = ARI->getCallPreservedMask(MF, CallConv);
2138 
2139  assert(Mask && "Missing call preserved mask for calling convention");
2140  Ops.push_back(DAG.getRegisterMask(Mask));
2141  }
2142 
2143  if (InFlag.getNode())
2144  Ops.push_back(InFlag);
2145 
2146  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2147  if (isTailCall) {
2149  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2150  }
2151 
2152  // Returns a chain and a flag for retval copy to use.
2153  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2154  InFlag = Chain.getValue(1);
2155 
2156  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2157  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2158  if (!Ins.empty())
2159  InFlag = Chain.getValue(1);
2160 
2161  // Handle result values, copying them out of physregs into vregs that we
2162  // return.
2163  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2164  InVals, isThisReturn,
2165  isThisReturn ? OutVals[0] : SDValue());
2166 }
2167 
2168 /// HandleByVal - Every parameter *after* a byval parameter is passed
2169 /// on the stack. Remember the next parameter register to allocate,
2170 /// and then confiscate the rest of the parameter registers to insure
2171 /// this.
2172 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2173  unsigned Align) const {
2174  // Byval (as with any stack) slots are always at least 4 byte aligned.
2175  Align = std::max(Align, 4U);
2176 
2177  unsigned Reg = State->AllocateReg(GPRArgRegs);
2178  if (!Reg)
2179  return;
2180 
2181  unsigned AlignInRegs = Align / 4;
2182  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2183  for (unsigned i = 0; i < Waste; ++i)
2184  Reg = State->AllocateReg(GPRArgRegs);
2185 
2186  if (!Reg)
2187  return;
2188 
2189  unsigned Excess = 4 * (ARM::R4 - Reg);
2190 
2191  // Special case when NSAA != SP and parameter size greater than size of
2192  // all remained GPR regs. In that case we can't split parameter, we must
2193  // send it to stack. We also must set NCRN to R4, so waste all
2194  // remained registers.
2195  const unsigned NSAAOffset = State->getNextStackOffset();
2196  if (NSAAOffset != 0 && Size > Excess) {
2197  while (State->AllocateReg(GPRArgRegs))
2198  ;
2199  return;
2200  }
2201 
2202  // First register for byval parameter is the first register that wasn't
2203  // allocated before this method call, so it would be "reg".
2204  // If parameter is small enough to be saved in range [reg, r4), then
2205  // the end (first after last) register would be reg + param-size-in-regs,
2206  // else parameter would be splitted between registers and stack,
2207  // end register would be r4 in this case.
2208  unsigned ByValRegBegin = Reg;
2209  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2210  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2211  // Note, first register is allocated in the beginning of function already,
2212  // allocate remained amount of registers we need.
2213  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2214  State->AllocateReg(GPRArgRegs);
2215  // A byval parameter that is split between registers and memory needs its
2216  // size truncated here.
2217  // In the case where the entire structure fits in registers, we set the
2218  // size in memory to zero.
2219  Size = std::max<int>(Size - Excess, 0);
2220 }
2221 
2222 /// MatchingStackOffset - Return true if the given stack call argument is
2223 /// already available in the same position (relatively) of the caller's
2224 /// incoming argument stack.
2225 static
2228  const TargetInstrInfo *TII) {
2229  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2230  int FI = std::numeric_limits<int>::max();
2231  if (Arg.getOpcode() == ISD::CopyFromReg) {
2232  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2234  return false;
2235  MachineInstr *Def = MRI->getVRegDef(VR);
2236  if (!Def)
2237  return false;
2238  if (!Flags.isByVal()) {
2239  if (!TII->isLoadFromStackSlot(*Def, FI))
2240  return false;
2241  } else {
2242  return false;
2243  }
2244  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2245  if (Flags.isByVal())
2246  // ByVal argument is passed in as a pointer but it's now being
2247  // dereferenced. e.g.
2248  // define @foo(%struct.X* %A) {
2249  // tail call @bar(%struct.X* byval %A)
2250  // }
2251  return false;
2252  SDValue Ptr = Ld->getBasePtr();
2253  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2254  if (!FINode)
2255  return false;
2256  FI = FINode->getIndex();
2257  } else
2258  return false;
2259 
2261  if (!MFI.isFixedObjectIndex(FI))
2262  return false;
2263  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2264 }
2265 
2266 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2267 /// for tail call optimization. Targets which want to do tail call
2268 /// optimization should implement this function.
2269 bool
2270 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2271  CallingConv::ID CalleeCC,
2272  bool isVarArg,
2273  bool isCalleeStructRet,
2274  bool isCallerStructRet,
2275  const SmallVectorImpl<ISD::OutputArg> &Outs,
2276  const SmallVectorImpl<SDValue> &OutVals,
2277  const SmallVectorImpl<ISD::InputArg> &Ins,
2278  SelectionDAG& DAG) const {
2279  MachineFunction &MF = DAG.getMachineFunction();
2280  const Function *CallerF = MF.getFunction();
2281  CallingConv::ID CallerCC = CallerF->getCallingConv();
2282 
2283  assert(Subtarget->supportsTailCall());
2284 
2285  // Look for obvious safe cases to perform tail call optimization that do not
2286  // require ABI changes. This is what gcc calls sibcall.
2287 
2288  // Exception-handling functions need a special set of instructions to indicate
2289  // a return to the hardware. Tail-calling another function would probably
2290  // break this.
2291  if (CallerF->hasFnAttribute("interrupt"))
2292  return false;
2293 
2294  // Also avoid sibcall optimization if either caller or callee uses struct
2295  // return semantics.
2296  if (isCalleeStructRet || isCallerStructRet)
2297  return false;
2298 
2299  // Externally-defined functions with weak linkage should not be
2300  // tail-called on ARM when the OS does not support dynamic
2301  // pre-emption of symbols, as the AAELF spec requires normal calls
2302  // to undefined weak functions to be replaced with a NOP or jump to the
2303  // next instruction. The behaviour of branch instructions in this
2304  // situation (as used for tail calls) is implementation-defined, so we
2305  // cannot rely on the linker replacing the tail call with a return.
2306  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2307  const GlobalValue *GV = G->getGlobal();
2308  const Triple &TT = getTargetMachine().getTargetTriple();
2309  if (GV->hasExternalWeakLinkage() &&
2310  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2311  return false;
2312  }
2313 
2314  // Check that the call results are passed in the same way.
2315  LLVMContext &C = *DAG.getContext();
2316  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2317  CCAssignFnForReturn(CalleeCC, isVarArg),
2318  CCAssignFnForReturn(CallerCC, isVarArg)))
2319  return false;
2320  // The callee has to preserve all registers the caller needs to preserve.
2321  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2322  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2323  if (CalleeCC != CallerCC) {
2324  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2325  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2326  return false;
2327  }
2328 
2329  // If Caller's vararg or byval argument has been split between registers and
2330  // stack, do not perform tail call, since part of the argument is in caller's
2331  // local frame.
2332  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2333  if (AFI_Caller->getArgRegsSaveSize())
2334  return false;
2335 
2336  // If the callee takes no arguments then go on to check the results of the
2337  // call.
2338  if (!Outs.empty()) {
2339  // Check if stack adjustment is needed. For now, do not do this if any
2340  // argument is passed on the stack.
2342  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2343  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2344  if (CCInfo.getNextStackOffset()) {
2345  // Check if the arguments are already laid out in the right way as
2346  // the caller's fixed stack objects.
2347  MachineFrameInfo &MFI = MF.getFrameInfo();
2348  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2349  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2350  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2351  i != e;
2352  ++i, ++realArgIdx) {
2353  CCValAssign &VA = ArgLocs[i];
2354  EVT RegVT = VA.getLocVT();
2355  SDValue Arg = OutVals[realArgIdx];
2356  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2357  if (VA.getLocInfo() == CCValAssign::Indirect)
2358  return false;
2359  if (VA.needsCustom()) {
2360  // f64 and vector types are split into multiple registers or
2361  // register/stack-slot combinations. The types will not match
2362  // the registers; give up on memory f64 refs until we figure
2363  // out what to do about this.
2364  if (!VA.isRegLoc())
2365  return false;
2366  if (!ArgLocs[++i].isRegLoc())
2367  return false;
2368  if (RegVT == MVT::v2f64) {
2369  if (!ArgLocs[++i].isRegLoc())
2370  return false;
2371  if (!ArgLocs[++i].isRegLoc())
2372  return false;
2373  }
2374  } else if (!VA.isRegLoc()) {
2375  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2376  MFI, MRI, TII))
2377  return false;
2378  }
2379  }
2380  }
2381 
2382  const MachineRegisterInfo &MRI = MF.getRegInfo();
2383  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2384  return false;
2385  }
2386 
2387  return true;
2388 }
2389 
2390 bool
2391 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2392  MachineFunction &MF, bool isVarArg,
2393  const SmallVectorImpl<ISD::OutputArg> &Outs,
2394  LLVMContext &Context) const {
2396  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2397  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2398 }
2399 
2401  const SDLoc &DL, SelectionDAG &DAG) {
2402  const MachineFunction &MF = DAG.getMachineFunction();
2403  const Function *F = MF.getFunction();
2404 
2405  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2406 
2407  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2408  // version of the "preferred return address". These offsets affect the return
2409  // instruction if this is a return from PL1 without hypervisor extensions.
2410  // IRQ/FIQ: +4 "subs pc, lr, #4"
2411  // SWI: 0 "subs pc, lr, #0"
2412  // ABORT: +4 "subs pc, lr, #4"
2413  // UNDEF: +4/+2 "subs pc, lr, #0"
2414  // UNDEF varies depending on where the exception came from ARM or Thumb
2415  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2416 
2417  int64_t LROffset;
2418  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2419  IntKind == "ABORT")
2420  LROffset = 4;
2421  else if (IntKind == "SWI" || IntKind == "UNDEF")
2422  LROffset = 0;
2423  else
2424  report_fatal_error("Unsupported interrupt attribute. If present, value "
2425  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2426 
2427  RetOps.insert(RetOps.begin() + 1,
2428  DAG.getConstant(LROffset, DL, MVT::i32, false));
2429 
2430  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2431 }
2432 
2433 SDValue
2434 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2435  bool isVarArg,
2436  const SmallVectorImpl<ISD::OutputArg> &Outs,
2437  const SmallVectorImpl<SDValue> &OutVals,
2438  const SDLoc &dl, SelectionDAG &DAG) const {
2439  // CCValAssign - represent the assignment of the return value to a location.
2441 
2442  // CCState - Info about the registers and stack slots.
2443  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2444  *DAG.getContext());
2445 
2446  // Analyze outgoing return values.
2447  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2448 
2449  SDValue Flag;
2450  SmallVector<SDValue, 4> RetOps;
2451  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2452  bool isLittleEndian = Subtarget->isLittle();
2453 
2454  MachineFunction &MF = DAG.getMachineFunction();
2456  AFI->setReturnRegsCount(RVLocs.size());
2457 
2458  // Copy the result values into the output registers.
2459  for (unsigned i = 0, realRVLocIdx = 0;
2460  i != RVLocs.size();
2461  ++i, ++realRVLocIdx) {
2462  CCValAssign &VA = RVLocs[i];
2463  assert(VA.isRegLoc() && "Can only return in registers!");
2464 
2465  SDValue Arg = OutVals[realRVLocIdx];
2466 
2467  switch (VA.getLocInfo()) {
2468  default: llvm_unreachable("Unknown loc info!");
2469  case CCValAssign::Full: break;
2470  case CCValAssign::BCvt:
2471  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2472  break;
2473  }
2474 
2475  if (VA.needsCustom()) {
2476  if (VA.getLocVT() == MVT::v2f64) {
2477  // Extract the first half and return it in two registers.
2478  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2479  DAG.getConstant(0, dl, MVT::i32));
2480  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2481  DAG.getVTList(MVT::i32, MVT::i32), Half);
2482 
2483  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2484  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2485  Flag);
2486  Flag = Chain.getValue(1);
2487  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2488  VA = RVLocs[++i]; // skip ahead to next loc
2489  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2490  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2491  Flag);
2492  Flag = Chain.getValue(1);
2493  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2494  VA = RVLocs[++i]; // skip ahead to next loc
2495 
2496  // Extract the 2nd half and fall through to handle it as an f64 value.
2497  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2498  DAG.getConstant(1, dl, MVT::i32));
2499  }
2500  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2501  // available.
2502  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2503  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2504  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2505  fmrrd.getValue(isLittleEndian ? 0 : 1),
2506  Flag);
2507  Flag = Chain.getValue(1);
2508  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2509  VA = RVLocs[++i]; // skip ahead to next loc
2510  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2511  fmrrd.getValue(isLittleEndian ? 1 : 0),
2512  Flag);
2513  } else
2514  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2515 
2516  // Guarantee that all emitted copies are
2517  // stuck together, avoiding something bad.
2518  Flag = Chain.getValue(1);
2519  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2520  }
2521  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2522  const MCPhysReg *I =
2524  if (I) {
2525  for (; *I; ++I) {
2526  if (ARM::GPRRegClass.contains(*I))
2527  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2528  else if (ARM::DPRRegClass.contains(*I))
2529  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2530  else
2531  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2532  }
2533  }
2534 
2535  // Update chain and glue.
2536  RetOps[0] = Chain;
2537  if (Flag.getNode())
2538  RetOps.push_back(Flag);
2539 
2540  // CPUs which aren't M-class use a special sequence to return from
2541  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2542  // though we use "subs pc, lr, #N").
2543  //
2544  // M-class CPUs actually use a normal return sequence with a special
2545  // (hardware-provided) value in LR, so the normal code path works.
2546  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2547  !Subtarget->isMClass()) {
2548  if (Subtarget->isThumb1Only())
2549  report_fatal_error("interrupt attribute is not supported in Thumb1");
2550  return LowerInterruptReturn(RetOps, dl, DAG);
2551  }
2552 
2553  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2554 }
2555 
2556 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2557  if (N->getNumValues() != 1)
2558  return false;
2559  if (!N->hasNUsesOfValue(1, 0))
2560  return false;
2561 
2562  SDValue TCChain = Chain;
2563  SDNode *Copy = *N->use_begin();
2564  if (Copy->getOpcode() == ISD::CopyToReg) {
2565  // If the copy has a glue operand, we conservatively assume it isn't safe to
2566  // perform a tail call.
2567  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2568  return false;
2569  TCChain = Copy->getOperand(0);
2570  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2571  SDNode *VMov = Copy;
2572  // f64 returned in a pair of GPRs.
2573  SmallPtrSet<SDNode*, 2> Copies;
2574  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2575  UI != UE; ++UI) {
2576  if (UI->getOpcode() != ISD::CopyToReg)
2577  return false;
2578  Copies.insert(*UI);
2579  }
2580  if (Copies.size() > 2)
2581  return false;
2582 
2583  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2584  UI != UE; ++UI) {
2585  SDValue UseChain = UI->getOperand(0);
2586  if (Copies.count(UseChain.getNode()))
2587  // Second CopyToReg
2588  Copy = *UI;
2589  else {
2590  // We are at the top of this chain.
2591  // If the copy has a glue operand, we conservatively assume it
2592  // isn't safe to perform a tail call.
2593  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2594  return false;
2595  // First CopyToReg
2596  TCChain = UseChain;
2597  }
2598  }
2599  } else if (Copy->getOpcode() == ISD::BITCAST) {
2600  // f32 returned in a single GPR.
2601  if (!Copy->hasOneUse())
2602  return false;
2603  Copy = *Copy->use_begin();
2604  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2605  return false;
2606  // If the copy has a glue operand, we conservatively assume it isn't safe to
2607  // perform a tail call.
2608  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2609  return false;
2610  TCChain = Copy->getOperand(0);
2611  } else {
2612  return false;
2613  }
2614 
2615  bool HasRet = false;
2616  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2617  UI != UE; ++UI) {
2618  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2619  UI->getOpcode() != ARMISD::INTRET_FLAG)
2620  return false;
2621  HasRet = true;
2622  }
2623 
2624  if (!HasRet)
2625  return false;
2626 
2627  Chain = TCChain;
2628  return true;
2629 }
2630 
2631 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2632  if (!Subtarget->supportsTailCall())
2633  return false;
2634 
2635  auto Attr =
2636  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2637  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2638  return false;
2639 
2640  return true;
2641 }
2642 
2643 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2644 // and pass the lower and high parts through.
2646  SDLoc DL(Op);
2647  SDValue WriteValue = Op->getOperand(2);
2648 
2649  // This function is only supposed to be called for i64 type argument.
2650  assert(WriteValue.getValueType() == MVT::i64
2651  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2652 
2653  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2654  DAG.getConstant(0, DL, MVT::i32));
2655  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2656  DAG.getConstant(1, DL, MVT::i32));
2657  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2658  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2659 }
2660 
2661 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2662 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2663 // one of the above mentioned nodes. It has to be wrapped because otherwise
2664 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2665 // be used to form addressing mode. These wrapped nodes will be selected
2666 // into MOVi.
2667 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2668  SelectionDAG &DAG) const {
2669  EVT PtrVT = Op.getValueType();
2670  // FIXME there is no actual debug info here
2671  SDLoc dl(Op);
2672  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2673  SDValue Res;
2674 
2675  // When generating execute-only code Constant Pools must be promoted to the
2676  // global data section. It's a bit ugly that we can't share them across basic
2677  // blocks, but this way we guarantee that execute-only behaves correct with
2678  // position-independent addressing modes.
2679  if (Subtarget->genExecuteOnly()) {
2680  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2681  auto T = const_cast<Type*>(CP->getType());
2682  auto C = const_cast<Constant*>(CP->getConstVal());
2683  auto M = const_cast<Module*>(DAG.getMachineFunction().
2684  getFunction()->getParent());
2685  auto GV = new GlobalVariable(
2686  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2687  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2688  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2689  Twine(AFI->createPICLabelUId())
2690  );
2691  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2692  dl, PtrVT);
2693  return LowerGlobalAddress(GA, DAG);
2694  }
2695 
2696  if (CP->isMachineConstantPoolEntry())
2697  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2698  CP->getAlignment());
2699  else
2700  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2701  CP->getAlignment());
2702  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2703 }
2704 
2707 }
2708 
2709 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2710  SelectionDAG &DAG) const {
2711  MachineFunction &MF = DAG.getMachineFunction();
2713  unsigned ARMPCLabelIndex = 0;
2714  SDLoc DL(Op);
2715  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2716  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2717  SDValue CPAddr;
2718  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2719  if (!IsPositionIndependent) {
2720  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2721  } else {
2722  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2723  ARMPCLabelIndex = AFI->createPICLabelUId();
2724  ARMConstantPoolValue *CPV =
2725  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2726  ARMCP::CPBlockAddress, PCAdj);
2727  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2728  }
2729  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2730  SDValue Result = DAG.getLoad(
2731  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2733  if (!IsPositionIndependent)
2734  return Result;
2735  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2736  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2737 }
2738 
2739 /// \brief Convert a TLS address reference into the correct sequence of loads
2740 /// and calls to compute the variable's address for Darwin, and return an
2741 /// SDValue containing the final node.
2742 
2743 /// Darwin only has one TLS scheme which must be capable of dealing with the
2744 /// fully general situation, in the worst case. This means:
2745 /// + "extern __thread" declaration.
2746 /// + Defined in a possibly unknown dynamic library.
2747 ///
2748 /// The general system is that each __thread variable has a [3 x i32] descriptor
2749 /// which contains information used by the runtime to calculate the address. The
2750 /// only part of this the compiler needs to know about is the first word, which
2751 /// contains a function pointer that must be called with the address of the
2752 /// entire descriptor in "r0".
2753 ///
2754 /// Since this descriptor may be in a different unit, in general access must
2755 /// proceed along the usual ARM rules. A common sequence to produce is:
2756 ///
2757 /// movw rT1, :lower16:_var$non_lazy_ptr
2758 /// movt rT1, :upper16:_var$non_lazy_ptr
2759 /// ldr r0, [rT1]
2760 /// ldr rT2, [r0]
2761 /// blx rT2
2762 /// [...address now in r0...]
2763 SDValue
2764 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2765  SelectionDAG &DAG) const {
2766  assert(Subtarget->isTargetDarwin() &&
2767  "This function expects a Darwin target");
2768  SDLoc DL(Op);
2769 
2770  // First step is to get the address of the actua global symbol. This is where
2771  // the TLS descriptor lives.
2772  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2773 
2774  // The first entry in the descriptor is a function pointer that we must call
2775  // to obtain the address of the variable.
2776  SDValue Chain = DAG.getEntryNode();
2777  SDValue FuncTLVGet = DAG.getLoad(
2778  MVT::i32, DL, Chain, DescAddr,
2780  /* Alignment = */ 4,
2783  Chain = FuncTLVGet.getValue(1);
2784 
2786  MachineFrameInfo &MFI = F.getFrameInfo();
2787  MFI.setAdjustsStack(true);
2788 
2789  // TLS calls preserve all registers except those that absolutely must be
2790  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2791  // silly).
2792  auto TRI =
2793  getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2794  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2795  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2796 
2797  // Finally, we can make the call. This is just a degenerate version of a
2798  // normal AArch64 call node: r0 takes the address of the descriptor, and
2799  // returns the address of the variable in this thread.
2800  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2801  Chain =
2803  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2804  DAG.getRegisterMask(Mask), Chain.getValue(1));
2805  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2806 }
2807 
2808 SDValue
2809 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2810  SelectionDAG &DAG) const {
2811  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2812 
2813  SDValue Chain = DAG.getEntryNode();
2814  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2815  SDLoc DL(Op);
2816 
2817  // Load the current TEB (thread environment block)
2818  SDValue Ops[] = {Chain,
2819  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2820  DAG.getConstant(15, DL, MVT::i32),
2821  DAG.getConstant(0, DL, MVT::i32),
2822  DAG.getConstant(13, DL, MVT::i32),
2823  DAG.getConstant(0, DL, MVT::i32),
2824  DAG.getConstant(2, DL, MVT::i32)};
2825  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2826  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2827 
2828  SDValue TEB = CurrentTEB.getValue(0);
2829  Chain = CurrentTEB.getValue(1);
2830 
2831  // Load the ThreadLocalStoragePointer from the TEB
2832  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2833  SDValue TLSArray =
2834  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2835  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2836 
2837  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2838  // offset into the TLSArray.
2839 
2840  // Load the TLS index from the C runtime
2841  SDValue TLSIndex =
2842  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2843  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2844  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2845 
2846  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2847  DAG.getConstant(2, DL, MVT::i32));
2848  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2849  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2850  MachinePointerInfo());
2851 
2852  // Get the offset of the start of the .tls section (section base)
2853  const auto *GA = cast<GlobalAddressSDNode>(Op);
2854  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2855  SDValue Offset = DAG.getLoad(
2856  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2857  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2859 
2860  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2861 }
2862 
2863 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2864 SDValue
2865 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2866  SelectionDAG &DAG) const {
2867  SDLoc dl(GA);
2868  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2869  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2870  MachineFunction &MF = DAG.getMachineFunction();
2872  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2873  ARMConstantPoolValue *CPV =
2874  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2875  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2876  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2877  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2878  Argument = DAG.getLoad(
2879  PtrVT, dl, DAG.getEntryNode(), Argument,
2881  SDValue Chain = Argument.getValue(1);
2882 
2883  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2884  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2885 
2886  // call __tls_get_addr.
2887  ArgListTy Args;
2888  ArgListEntry Entry;
2889  Entry.Node = Argument;
2890  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2891  Args.push_back(Entry);
2892 
2893  // FIXME: is there useful debug info available here?
2895  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2897  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2898 
2899  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2900  return CallResult.first;
2901 }
2902 
2903 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2904 // "local exec" model.
2905 SDValue
2906 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2907  SelectionDAG &DAG,
2908  TLSModel::Model model) const {
2909  const GlobalValue *GV = GA->getGlobal();
2910  SDLoc dl(GA);
2911  SDValue Offset;
2912  SDValue Chain = DAG.getEntryNode();
2913  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2914  // Get the Thread Pointer
2916 
2917  if (model == TLSModel::InitialExec) {
2918  MachineFunction &MF = DAG.getMachineFunction();
2920  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2921  // Initial exec model.
2922  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2923  ARMConstantPoolValue *CPV =
2924  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2926  true);
2927  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2928  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2929  Offset = DAG.getLoad(
2930  PtrVT, dl, Chain, Offset,
2932  Chain = Offset.getValue(1);
2933 
2934  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2935  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2936 
2937  Offset = DAG.getLoad(
2938  PtrVT, dl, Chain, Offset,
2940  } else {
2941  // local exec model
2942  assert(model == TLSModel::LocalExec);
2943  ARMConstantPoolValue *CPV =
2945  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2946  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2947  Offset = DAG.getLoad(
2948  PtrVT, dl, Chain, Offset,
2950  }
2951 
2952  // The address of the thread local variable is the add of the thread
2953  // pointer with the offset of the variable.
2954  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2955 }
2956 
2957 SDValue
2958 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2959  if (Subtarget->isTargetDarwin())
2960  return LowerGlobalTLSAddressDarwin(Op, DAG);
2961 
2962  if (Subtarget->isTargetWindows())
2963  return LowerGlobalTLSAddressWindows(Op, DAG);
2964 
2965  // TODO: implement the "local dynamic" model
2966  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
2967  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2968  if (DAG.getTarget().Options.EmulatedTLS)
2969  return LowerToTLSEmulatedModel(GA, DAG);
2970 
2971  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2972 
2973  switch (model) {
2976  return LowerToTLSGeneralDynamicModel(GA, DAG);
2977  case TLSModel::InitialExec:
2978  case TLSModel::LocalExec:
2979  return LowerToTLSExecModels(GA, DAG, model);
2980  }
2981  llvm_unreachable("bogus TLS model");
2982 }
2983 
2984 /// Return true if all users of V are within function F, looking through
2985 /// ConstantExprs.
2986 static bool allUsersAreInFunction(const Value *V, const Function *F) {
2987  SmallVector<const User*,4> Worklist;
2988  for (auto *U : V->users())
2989  Worklist.push_back(U);
2990  while (!Worklist.empty()) {
2991  auto *U = Worklist.pop_back_val();
2992  if (isa<ConstantExpr>(U)) {
2993  for (auto *UU : U->users())
2994  Worklist.push_back(UU);
2995  continue;
2996  }
2997 
2998  auto *I = dyn_cast<Instruction>(U);
2999  if (!I || I->getParent()->getParent() != F)
3000  return false;
3001  }
3002  return true;
3003 }
3004 
3005 /// Return true if all users of V are within some (any) function, looking through
3006 /// ConstantExprs. In other words, are there any global constant users?
3007 static bool allUsersAreInFunctions(const Value *V) {
3008  SmallVector<const User*,4> Worklist;
3009  for (auto *U : V->users())
3010  Worklist.push_back(U);
3011  while (!Worklist.empty()) {
3012  auto *U = Worklist.pop_back_val();
3013  if (isa<ConstantExpr>(U)) {
3014  for (auto *UU : U->users())
3015  Worklist.push_back(UU);
3016  continue;
3017  }
3018 
3019  if (!isa<Instruction>(U))
3020  return false;
3021  }
3022  return true;
3023 }
3024 
3025 // Return true if T is an integer, float or an array/vector of either.
3026 static bool isSimpleType(Type *T) {
3027  if (T->isIntegerTy() || T->isFloatingPointTy())
3028  return true;
3029  Type *SubT = nullptr;
3030  if (T->isArrayTy())
3031  SubT = T->getArrayElementType();
3032  else if (T->isVectorTy())
3033  SubT = T->getVectorElementType();
3034  else
3035  return false;
3036  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3037 }
3038 
3040  EVT PtrVT, const SDLoc &dl) {
3041  // If we're creating a pool entry for a constant global with unnamed address,
3042  // and the global is small enough, we can emit it inline into the constant pool
3043  // to save ourselves an indirection.
3044  //
3045  // This is a win if the constant is only used in one function (so it doesn't
3046  // need to be duplicated) or duplicating the constant wouldn't increase code
3047  // size (implying the constant is no larger than 4 bytes).
3048  const Function *F = DAG.getMachineFunction().getFunction();
3049 
3050  // We rely on this decision to inline being idemopotent and unrelated to the
3051  // use-site. We know that if we inline a variable at one use site, we'll
3052  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3053  // doesn't know about this optimization, so bail out if it's enabled else
3054  // we could decide to inline here (and thus never emit the GV) but require
3055  // the GV from fast-isel generated code.
3056  if (!EnableConstpoolPromotion ||
3058  return SDValue();
3059 
3060  auto *GVar = dyn_cast<GlobalVariable>(GV);
3061  if (!GVar || !GVar->hasInitializer() ||
3062  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3063  !GVar->hasLocalLinkage())
3064  return SDValue();
3065 
3066  // Ensure that we don't try and inline any type that contains pointers. If
3067  // we inline a value that contains relocations, we move the relocations from
3068  // .data to .text which is not ideal.
3069  auto *Init = GVar->getInitializer();
3070  if (!isSimpleType(Init->getType()))
3071  return SDValue();
3072 
3073  // The constant islands pass can only really deal with alignment requests
3074  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3075  // any type wanting greater alignment requirements than 4 bytes. We also
3076  // can only promote constants that are multiples of 4 bytes in size or
3077  // are paddable to a multiple of 4. Currently we only try and pad constants
3078  // that are strings for simplicity.
3079  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3080  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3081  unsigned Align = GVar->getAlignment();
3082  unsigned RequiredPadding = 4 - (Size % 4);
3083  bool PaddingPossible =
3084  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3085  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3086  Size == 0)
3087  return SDValue();
3088 
3089  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3090  MachineFunction &MF = DAG.getMachineFunction();
3092 
3093  // We can't bloat the constant pool too much, else the ConstantIslands pass
3094  // may fail to converge. If we haven't promoted this global yet (it may have
3095  // multiple uses), and promoting it would increase the constant pool size (Sz
3096  // > 4), ensure we have space to do so up to MaxTotal.
3097  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3098  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3100  return SDValue();
3101 
3102  // This is only valid if all users are in a single function OR it has users
3103  // in multiple functions but it no larger than a pointer. We also check if
3104  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3105  // address taken.
3106  if (!allUsersAreInFunction(GVar, F) &&
3107  !(Size <= 4 && allUsersAreInFunctions(GVar)))
3108  return SDValue();
3109 
3110  // We're going to inline this global. Pad it out if needed.
3111  if (RequiredPadding != 4) {
3112  StringRef S = CDAInit->getAsString();
3113 
3115  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3116  while (RequiredPadding--)
3117  V.push_back(0);
3118  Init = ConstantDataArray::get(*DAG.getContext(), V);
3119  }
3120 
3121  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3122  SDValue CPAddr =
3123  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3124  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3127  PaddedSize - 4);
3128  }
3129  ++NumConstpoolPromoted;
3130  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3131 }
3132 
3134  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3135  GV = GA->getBaseObject();
3136  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3137  isa<Function>(GV);
3138 }
3139 
3140 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3141  SelectionDAG &DAG) const {
3142  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3143  default: llvm_unreachable("unknown object format");
3144  case Triple::COFF:
3145  return LowerGlobalAddressWindows(Op, DAG);
3146  case Triple::ELF:
3147  return LowerGlobalAddressELF(Op, DAG);
3148  case Triple::MachO:
3149  return LowerGlobalAddressDarwin(Op, DAG);
3150  }
3151 }
3152 
3153 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3154  SelectionDAG &DAG) const {
3155  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3156  SDLoc dl(Op);
3157  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3158  const TargetMachine &TM = getTargetMachine();
3159  bool IsRO = isReadOnly(GV);
3160 
3161  // promoteToConstantPool only if not generating XO text section
3162  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3163  if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3164  return V;
3165 
3166  if (isPositionIndependent()) {
3167  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3168  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3169  UseGOT_PREL ? ARMII::MO_GOT : 0);
3170  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3171  if (UseGOT_PREL)
3172  Result =
3173  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3175  return Result;
3176  } else if (Subtarget->isROPI() && IsRO) {
3177  // PC-relative.
3178  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3179  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3180  return Result;
3181  } else if (Subtarget->isRWPI() && !IsRO) {
3182  // SB-relative.
3183  SDValue RelAddr;
3184  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3185  ++NumMovwMovt;
3186  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3187  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3188  } else { // use literal pool for address constant
3189  ARMConstantPoolValue *CPV =
3191  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3192  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3193  RelAddr = DAG.getLoad(
3194  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3196  }
3197  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3198  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3199  return Result;
3200  }
3201 
3202  // If we have T2 ops, we can materialize the address directly via movt/movw
3203  // pair. This is always cheaper.
3204  if (Subtarget->useMovt(DAG.getMachineFunction())) {
3205  ++NumMovwMovt;
3206  // FIXME: Once remat is capable of dealing with instructions with register
3207  // operands, expand this into two nodes.
3208  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3209  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3210  } else {
3211  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3212  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3213  return DAG.getLoad(
3214  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3216  }
3217 }
3218 
3219 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3220  SelectionDAG &DAG) const {
3221  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3222  "ROPI/RWPI not currently supported for Darwin");
3223  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3224  SDLoc dl(Op);
3225  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3226 
3227  if (Subtarget->useMovt(DAG.getMachineFunction()))
3228  ++NumMovwMovt;
3229 
3230  // FIXME: Once remat is capable of dealing with instructions with register
3231  // operands, expand this into multiple nodes
3232  unsigned Wrapper =
3234 
3235  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3236  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3237 
3238  if (Subtarget->isGVIndirectSymbol(GV))
3239  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3241  return Result;
3242 }
3243 
3244 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3245  SelectionDAG &DAG) const {
3246  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3247  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
3248  "Windows on ARM expects to use movw/movt");
3249  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3250  "ROPI/RWPI not currently supported for Windows");
3251 
3252  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3253  const ARMII::TOF TargetFlags =
3254  (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3255  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3256  SDValue Result;
3257  SDLoc DL(Op);
3258 
3259  ++NumMovwMovt;
3260 
3261  // FIXME: Once remat is capable of dealing with instructions with register
3262  // operands, expand this into two nodes.
3263  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3264  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3265  TargetFlags));
3266  if (GV->hasDLLImportStorageClass())
3267  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3269  return Result;
3270 }
3271 
3272 SDValue
3273 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3274  SDLoc dl(Op);
3275  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3276  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3277  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3278  Op.getOperand(1), Val);
3279 }
3280 
3281 SDValue
3282 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3283  SDLoc dl(Op);
3284  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3285  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3286 }
3287 
3288 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3289  SelectionDAG &DAG) const {
3290  SDLoc dl(Op);
3292  Op.getOperand(0));
3293 }
3294 
3295 SDValue
3296 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3297  const ARMSubtarget *Subtarget) const {
3298  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3299  SDLoc dl(Op);
3300  switch (IntNo) {
3301  default: return SDValue(); // Don't custom lower most intrinsics.
3302  case Intrinsic::thread_pointer: {
3303  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3304  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3305  }
3306  case Intrinsic::eh_sjlj_lsda: {
3307  MachineFunction &MF = DAG.getMachineFunction();
3309  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3310  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3311  SDValue CPAddr;
3312  bool IsPositionIndependent = isPositionIndependent();
3313  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3314  ARMConstantPoolValue *CPV =
3315  ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3316  ARMCP::CPLSDA, PCAdj);
3317  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3318  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3319  SDValue Result = DAG.getLoad(
3320  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3322 
3323  if (IsPositionIndependent) {
3324  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3325  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3326  }
3327  return Result;
3328  }
3329  case Intrinsic::arm_neon_vabs:
3330  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3331  Op.getOperand(1));
3332  case Intrinsic::arm_neon_vmulls:
3333  case Intrinsic::arm_neon_vmullu: {
3334  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3336  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3337  Op.getOperand(1), Op.getOperand(2));
3338  }
3339  case Intrinsic::arm_neon_vminnm:
3340  case Intrinsic::arm_neon_vmaxnm: {
3341  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3343  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3344  Op.getOperand(1), Op.getOperand(2));
3345  }
3346  case Intrinsic::arm_neon_vminu:
3347  case Intrinsic::arm_neon_vmaxu: {
3348  if (Op.getValueType().isFloatingPoint())
3349  return SDValue();
3350  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3351  ? ISD::UMIN : ISD::UMAX;
3352  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3353  Op.getOperand(1), Op.getOperand(2));
3354  }
3355  case Intrinsic::arm_neon_vmins:
3356  case Intrinsic::arm_neon_vmaxs: {
3357  // v{min,max}s is overloaded between signed integers and floats.
3358  if (!Op.getValueType().isFloatingPoint()) {
3359  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3360  ? ISD::SMIN : ISD::SMAX;
3361  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3362  Op.getOperand(1), Op.getOperand(2));
3363  }
3364  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3366  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3367  Op.getOperand(1), Op.getOperand(2));
3368  }
3369  case Intrinsic::arm_neon_vtbl1:
3370  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3371  Op.getOperand(1), Op.getOperand(2));
3372  case Intrinsic::arm_neon_vtbl2:
3373  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3374  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3375  }
3376 }
3377 
3379  const ARMSubtarget *Subtarget) {
3380  SDLoc dl(Op);
3381  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3382  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3383  if (SSID == SyncScope::SingleThread)
3384  return Op;
3385 
3386  if (!Subtarget->hasDataBarrier()) {
3387  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3388  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3389  // here.
3390  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3391  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3392  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3393  DAG.getConstant(0, dl, MVT::i32));
3394  }
3395 
3396  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3397  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3398  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3399  if (Subtarget->isMClass()) {
3400  // Only a full system barrier exists in the M-class architectures.
3401  Domain = ARM_MB::SY;
3402  } else if (Subtarget->preferISHSTBarriers() &&
3403  Ord == AtomicOrdering::Release) {
3404  // Swift happens to implement ISHST barriers in a way that's compatible with
3405  // Release semantics but weaker than ISH so we'd be fools not to use
3406  // it. Beware: other processors probably don't!
3407  Domain = ARM_MB::ISHST;
3408  }
3409 
3410  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3411  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3412  DAG.getConstant(Domain, dl, MVT::i32));
3413 }
3414 
3416  const ARMSubtarget *Subtarget) {
3417  // ARM pre v5TE and Thumb1 does not have preload instructions.
3418  if (!(Subtarget->isThumb2() ||
3419  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3420  // Just preserve the chain.
3421  return Op.getOperand(0);
3422 
3423  SDLoc dl(Op);
3424  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3425  if (!isRead &&
3426  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3427  // ARMv7 with MP extension has PLDW.
3428  return Op.getOperand(0);
3429 
3430  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3431  if (Subtarget->isThumb()) {
3432  // Invert the bits.
3433  isRead = ~isRead & 1;
3434  isData = ~isData & 1;
3435  }
3436 
3437  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3438  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3439  DAG.getConstant(isData, dl, MVT::i32));
3440 }
3441 
3443  MachineFunction &MF = DAG.getMachineFunction();
3444  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3445 
3446  // vastart just stores the address of the VarArgsFrameIndex slot into the
3447  // memory location argument.
3448  SDLoc dl(Op);
3449  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3450  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3451  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3452  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3453  MachinePointerInfo(SV));
3454 }
3455 
3456 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3457  CCValAssign &NextVA,
3458  SDValue &Root,
3459  SelectionDAG &DAG,
3460  const SDLoc &dl) const {
3461  MachineFunction &MF = DAG.getMachineFunction();
3463 
3464  const TargetRegisterClass *RC;
3465  if (AFI->isThumb1OnlyFunction())
3466  RC = &ARM::tGPRRegClass;
3467  else
3468  RC = &ARM::GPRRegClass;
3469 
3470  // Transform the arguments stored in physical registers into virtual ones.
3471  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3472  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3473 
3474  SDValue ArgValue2;
3475  if (NextVA.isMemLoc()) {
3476  MachineFrameInfo &MFI = MF.getFrameInfo();
3477  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3478 
3479  // Create load node to retrieve arguments from the stack.
3480  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3481  ArgValue2 = DAG.getLoad(
3482  MVT::i32, dl, Root, FIN,
3484  } else {
3485  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3486  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3487  }
3488  if (!Subtarget->isLittle())
3489  std::swap (ArgValue, ArgValue2);
3490  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3491 }
3492 
3493 // The remaining GPRs hold either the beginning of variable-argument
3494 // data, or the beginning of an aggregate passed by value (usually
3495 // byval). Either way, we allocate stack slots adjacent to the data
3496 // provided by our caller, and store the unallocated registers there.
3497 // If this is a variadic function, the va_list pointer will begin with
3498 // these values; otherwise, this reassembles a (byval) structure that
3499 // was split between registers and memory.
3500 // Return: The frame index registers were stored into.
3501 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3502  const SDLoc &dl, SDValue &Chain,
3503  const Value *OrigArg,
3504  unsigned InRegsParamRecordIdx,
3505  int ArgOffset, unsigned ArgSize) const {
3506  // Currently, two use-cases possible:
3507  // Case #1. Non-var-args function, and we meet first byval parameter.
3508  // Setup first unallocated register as first byval register;
3509  // eat all remained registers
3510  // (these two actions are performed by HandleByVal method).
3511  // Then, here, we initialize stack frame with
3512  // "store-reg" instructions.
3513  // Case #2. Var-args function, that doesn't contain byval parameters.
3514  // The same: eat all remained unallocated registers,
3515  // initialize stack frame.
3516 
3517  MachineFunction &MF = DAG.getMachineFunction();
3518  MachineFrameInfo &MFI = MF.getFrameInfo();
3520  unsigned RBegin, REnd;
3521  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3522  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3523  } else {
3524  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3525  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3526  REnd = ARM::R4;
3527  }
3528 
3529  if (REnd != RBegin)
3530  ArgOffset = -4 * (ARM::R4 - RBegin);
3531 
3532  auto PtrVT = getPointerTy(DAG.getDataLayout());
3533  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3534  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3535 
3536  SmallVector<SDValue, 4> MemOps;
3537  const TargetRegisterClass *RC =
3538  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3539 
3540  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3541  unsigned VReg = MF.addLiveIn(Reg, RC);
3542  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3543  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3544  MachinePointerInfo(OrigArg, 4 * i));
3545  MemOps.push_back(Store);
3546  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3547  }
3548 
3549  if (!MemOps.empty())
3550  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3551  return FrameIndex;
3552 }
3553 
3554 // Setup stack frame, the va_list pointer will start from.
3555 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3556  const SDLoc &dl, SDValue &Chain,
3557  unsigned ArgOffset,
3558  unsigned TotalArgRegsSaveSize,
3559  bool ForceMutable) const {
3560  MachineFunction &MF = DAG.getMachineFunction();
3562 
3563  // Try to store any remaining integer argument regs
3564  // to their spots on the stack so that they may be loaded by dereferencing
3565  // the result of va_next.
3566  // If there is no regs to be stored, just point address after last
3567  // argument passed via stack.
3568  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3569  CCInfo.getInRegsParamsCount(),
3570  CCInfo.getNextStackOffset(), 4);
3571  AFI->setVarArgsFrameIndex(FrameIndex);
3572 }
3573 
3574 SDValue ARMTargetLowering::LowerFormalArguments(
3575  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3576  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3577  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3578  MachineFunction &MF = DAG.getMachineFunction();
3579  MachineFrameInfo &MFI = MF.getFrameInfo();
3580 
3582 
3583  // Assign locations to all of the incoming arguments.
3585  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3586  *DAG.getContext());
3587  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3588 
3589  SmallVector<SDValue, 16> ArgValues;
3590  SDValue ArgValue;
3591  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3592  unsigned CurArgIdx = 0;
3593 
3594  // Initially ArgRegsSaveSize is zero.
3595  // Then we increase this value each time we meet byval parameter.
3596  // We also increase this value in case of varargs function.
3597  AFI->setArgRegsSaveSize(0);
3598 
3599  // Calculate the amount of stack space that we need to allocate to store
3600  // byval and variadic arguments that are passed in registers.
3601  // We need to know this before we allocate the first byval or variadic
3602  // argument, as they will be allocated a stack slot below the CFA (Canonical
3603  // Frame Address, the stack pointer at entry to the function).
3604  unsigned ArgRegBegin = ARM::R4;
3605  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3606  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3607  break;
3608 
3609  CCValAssign &VA = ArgLocs[i];
3610  unsigned Index = VA.getValNo();
3611  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3612  if (!Flags.isByVal())
3613  continue;
3614 
3615  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3616  unsigned RBegin, REnd;
3617  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3618  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3619 
3620  CCInfo.nextInRegsParam();
3621  }
3622  CCInfo.rewindByValRegsInfo();
3623 
3624  int lastInsIndex = -1;
3625  if (isVarArg && MFI.hasVAStart()) {
3626  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3627  if (RegIdx != array_lengthof(GPRArgRegs))
3628  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3629  }
3630 
3631  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3632  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3633  auto PtrVT = getPointerTy(DAG.getDataLayout());
3634 
3635  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3636  CCValAssign &VA = ArgLocs[i];
3637  if (Ins[VA.getValNo()].isOrigArg()) {
3638  std::advance(CurOrigArg,
3639  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3640  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3641  }
3642  // Arguments stored in registers.
3643  if (VA.isRegLoc()) {
3644  EVT RegVT = VA.getLocVT();
3645 
3646  if (VA.needsCustom()) {
3647  // f64 and vector types are split up into multiple registers or
3648  // combinations of registers and stack slots.
3649  if (VA.getLocVT() == MVT::v2f64) {
3650  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3651  Chain, DAG, dl);
3652  VA = ArgLocs[++i]; // skip ahead to next loc
3653  SDValue ArgValue2;
3654  if (VA.isMemLoc()) {
3655  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3656  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3657  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3659  DAG.getMachineFunction(), FI));
3660  } else {
3661  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3662  Chain, DAG, dl);
3663  }
3664  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3665  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3666  ArgValue, ArgValue1,
3667  DAG.getIntPtrConstant(0, dl));
3668  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3669  ArgValue, ArgValue2,
3670  DAG.getIntPtrConstant(1, dl));
3671  } else
3672  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3673  } else {
3674  const TargetRegisterClass *RC;
3675 
3676  if (RegVT == MVT::f32)
3677  RC = &ARM::SPRRegClass;
3678  else if (RegVT == MVT::f64)
3679  RC = &ARM::DPRRegClass;
3680  else if (RegVT == MVT::v2f64)
3681  RC = &ARM::QPRRegClass;
3682  else if (RegVT == MVT::i32)
3683  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3684  : &ARM::GPRRegClass;
3685  else
3686  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3687 
3688  // Transform the arguments in physical registers into virtual ones.
3689  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3690  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3691  }
3692 
3693  // If this is an 8 or 16-bit value, it is really passed promoted
3694  // to 32 bits. Insert an assert[sz]ext to capture this, then
3695  // truncate to the right size.
3696  switch (VA.getLocInfo()) {
3697  default: llvm_unreachable("Unknown loc info!");
3698  case CCValAssign::Full: break;
3699  case CCValAssign::BCvt:
3700  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3701  break;
3702  case CCValAssign::SExt:
3703  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3704  DAG.getValueType(VA.getValVT()));
3705  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3706  break;
3707  case CCValAssign::ZExt:
3708  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3709  DAG.getValueType(VA.getValVT()));
3710  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3711  break;
3712  }
3713 
3714  InVals.push_back(ArgValue);
3715  } else { // VA.isRegLoc()
3716  // sanity check
3717  assert(VA.isMemLoc());
3718  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3719 
3720  int index = VA.getValNo();
3721 
3722  // Some Ins[] entries become multiple ArgLoc[] entries.
3723  // Process them only once.
3724  if (index != lastInsIndex)
3725  {
3726  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3727  // FIXME: For now, all byval parameter objects are marked mutable.
3728  // This can be changed with more analysis.
3729  // In case of tail call optimization mark all arguments mutable.
3730  // Since they could be overwritten by lowering of arguments in case of
3731  // a tail call.
3732  if (Flags.isByVal()) {
3733  assert(Ins[index].isOrigArg() &&
3734  "Byval arguments cannot be implicit");
3735  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3736 
3737  int FrameIndex = StoreByValRegs(
3738  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3739  VA.getLocMemOffset(), Flags.getByValSize());
3740  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3741  CCInfo.nextInRegsParam();
3742  } else {
3743  unsigned FIOffset = VA.getLocMemOffset();
3744  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3745  FIOffset, true);
3746 
3747  // Create load nodes to retrieve arguments from the stack.
3748  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3749  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3751  DAG.getMachineFunction(), FI)));
3752  }
3753  lastInsIndex = index;
3754  }
3755  }
3756  }
3757 
3758  // varargs
3759  if (isVarArg && MFI.hasVAStart())
3760  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3761  CCInfo.getNextStackOffset(),
3762  TotalArgRegsSaveSize);
3763 
3765 
3766  return Chain;
3767 }
3768 
3769 /// isFloatingPointZero - Return true if this is +0.0.
3770 static bool isFloatingPointZero(SDValue Op) {
3771  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3772  return CFP->getValueAPF().isPosZero();
3773  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3774  // Maybe this has already been legalized into the constant pool?
3775  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3776  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3777  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3778  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3779  return CFP->getValueAPF().isPosZero();
3780  }
3781  } else if (Op->getOpcode() == ISD::BITCAST &&
3782  Op->getValueType(0) == MVT::f64) {
3783  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3784  // created by LowerConstantFP().
3785  SDValue BitcastOp = Op->getOperand(0);
3786  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3787  isNullConstant(BitcastOp->getOperand(0)))
3788  return true;
3789  }
3790  return false;
3791 }
3792 
3793 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3794 /// the given operands.
3795 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3796  SDValue &ARMcc, SelectionDAG &DAG,
3797  const SDLoc &dl) const {
3798  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3799  unsigned C = RHSC->getZExtValue();
3800  if (!isLegalICmpImmediate(C)) {
3801  // Constant does not fit, try adjusting it by one?
3802  switch (CC) {
3803  default: break;
3804  case ISD::SETLT:
3805  case ISD::SETGE:
3806  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3807  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3808  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3809  }
3810  break;
3811  case ISD::SETULT:
3812  case ISD::SETUGE:
3813  if (C != 0 && isLegalICmpImmediate(C-1)) {
3814  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3815  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3816  }
3817  break;
3818  case ISD::SETLE:
3819  case ISD::SETGT:
3820  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3821  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3822  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3823  }
3824  break;
3825  case ISD::SETULE:
3826  case ISD::SETUGT:
3827  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3828  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3829  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3830  }
3831  break;
3832  }
3833  }
3834  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3836  // In ARM and Thumb-2, the compare instructions can shift their second
3837  // operand.
3839  std::swap(LHS, RHS);
3840  }
3841 
3843  ARMISD::NodeType CompareType;
3844  switch (CondCode) {
3845  default:
3846  CompareType = ARMISD::CMP;
3847  break;
3848  case ARMCC::EQ:
3849  case ARMCC::NE:
3850  // Uses only Z Flag
3851  CompareType = ARMISD::CMPZ;
3852  break;
3853  }
3854  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3855  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3856 }
3857 
3858 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3859 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3860  SelectionDAG &DAG, const SDLoc &dl,
3861  bool InvalidOnQNaN) const {
3862  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3863  SDValue Cmp;
3864  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3865  if (!isFloatingPointZero(RHS))
3866  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3867  else
3868  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3869  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3870 }
3871 
3872 /// duplicateCmp - Glue values can have only one use, so this function
3873 /// duplicates a comparison node.
3874 SDValue
3875 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3876  unsigned Opc = Cmp.getOpcode();
3877  SDLoc DL(Cmp);
3878  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3879  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3880 
3881  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3882  Cmp = Cmp.getOperand(0);
3883  Opc = Cmp.getOpcode();
3884  if (Opc == ARMISD::CMPFP)
3885  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3886  Cmp.getOperand(1), Cmp.getOperand(2));
3887  else {
3888  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3889  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3890  Cmp.getOperand(1));
3891  }
3892  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3893 }
3894 
3895 std::pair<SDValue, SDValue>
3896 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3897  SDValue &ARMcc) const {
3898  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3899 
3900  SDValue Value, OverflowCmp;
3901  SDValue LHS = Op.getOperand(0);
3902  SDValue RHS = Op.getOperand(1);
3903  SDLoc dl(Op);
3904 
3905  // FIXME: We are currently always generating CMPs because we don't support
3906  // generating CMN through the backend. This is not as good as the natural
3907  // CMP case because it causes a register dependency and cannot be folded
3908  // later.
3909 
3910  switch (Op.getOpcode()) {
3911  default:
3912  llvm_unreachable("Unknown overflow instruction!");
3913  case ISD::SADDO:
3914  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3915  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3916  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3917  break;
3918  case ISD::UADDO:
3919  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3920  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3921  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3922  break;
3923  case ISD::SSUBO:
3924  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3925  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3926  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3927  break;
3928  case ISD::USUBO:
3929  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3930  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3931  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3932  break;
3933  } // switch (...)
3934 
3935  return std::make_pair(Value, OverflowCmp);
3936 }
3937 
3938 SDValue
3939 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3940  // Let legalize expand this if it isn't a legal type yet.
3942  return SDValue();
3943 
3944  SDValue Value, OverflowCmp;
3945  SDValue ARMcc;
3946  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3947  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3948  SDLoc dl(Op);
3949  // We use 0 and 1 as false and true values.
3950  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3951  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3952  EVT VT = Op.getValueType();
3953 
3954  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3955  ARMcc, CCR, OverflowCmp);
3956 
3957  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3958  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3959 }
3960 
3961 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3962  SDValue Cond = Op.getOperand(0);
3963  SDValue SelectTrue = Op.getOperand(1);
3964  SDValue SelectFalse = Op.getOperand(2);
3965  SDLoc dl(Op);
3966  unsigned Opc = Cond.getOpcode();
3967 
3968  if (Cond.getResNo() == 1 &&
3969  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3970  Opc == ISD::USUBO)) {
3971  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3972  return SDValue();
3973 
3974  SDValue Value, OverflowCmp;
3975  SDValue ARMcc;
3976  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3977  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3978  EVT VT = Op.getValueType();
3979 
3980  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3981  OverflowCmp, DAG);
3982  }
3983 
3984  // Convert:
3985  //
3986  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3987  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3988  //
3989  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3990  const ConstantSDNode *CMOVTrue =
3992  const ConstantSDNode *CMOVFalse =
3994 
3995  if (CMOVTrue && CMOVFalse) {
3996  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3997  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3998 
3999  SDValue True;
4000  SDValue False;
4001  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4002  True = SelectTrue;
4003  False = SelectFalse;
4004  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4005  True = SelectFalse;
4006  False = SelectTrue;
4007  }
4008 
4009  if (True.getNode() && False.getNode()) {
4010  EVT VT = Op.getValueType();
4011  SDValue ARMcc = Cond.getOperand(2);
4012  SDValue CCR = Cond.getOperand(3);
4013  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4014  assert(True.getValueType() == VT);
4015  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4016  }
4017  }
4018  }
4019 
4020  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4021  // undefined bits before doing a full-word comparison with zero.
4022  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4023  DAG.getConstant(1, dl, Cond.getValueType()));
4024 
4025  return DAG.getSelectCC(dl, Cond,
4026  DAG.getConstant(0, dl, Cond.getValueType()),
4027  SelectTrue, SelectFalse, ISD::SETNE);
4028 }
4029 
4031  bool &swpCmpOps, bool &swpVselOps) {
4032  // Start by selecting the GE condition code for opcodes that return true for
4033  // 'equality'
4034  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4035  CC == ISD::SETULE)
4036  CondCode = ARMCC::GE;
4037 
4038  // and GT for opcodes that return false for 'equality'.
4039  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4040  CC == ISD::SETULT)
4041  CondCode = ARMCC::GT;
4042 
4043  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4044  // to swap the compare operands.
4045  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4046  CC == ISD::SETULT)
4047  swpCmpOps = true;
4048 
4049  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4050  // If we have an unordered opcode, we need to swap the operands to the VSEL
4051  // instruction (effectively negating the condition).
4052  //
4053  // This also has the effect of swapping which one of 'less' or 'greater'
4054  // returns true, so we also swap the compare operands. It also switches
4055  // whether we return true for 'equality', so we compensate by picking the
4056  // opposite condition code to our original choice.
4057  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4058  CC == ISD::SETUGT) {
4059  swpCmpOps = !swpCmpOps;
4060  swpVselOps = !swpVselOps;
4061  CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4062  }
4063 
4064  // 'ordered' is 'anything but unordered', so use the VS condition code and
4065  // swap the VSEL operands.
4066  if (CC == ISD::SETO) {
4067  CondCode = ARMCC::VS;
4068  swpVselOps = true;
4069  }
4070 
4071  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4072  // code and swap the VSEL operands.
4073  if (CC == ISD::SETUNE) {
4074  CondCode = ARMCC::EQ;
4075  swpVselOps = true;
4076  }
4077 }
4078 
4079 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4080  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4081  SDValue Cmp, SelectionDAG &DAG) const {
4082  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4083  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4084  DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4085  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4086  DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4087 
4088  SDValue TrueLow = TrueVal.getValue(0);
4089  SDValue TrueHigh = TrueVal.getValue(1);
4090  SDValue FalseLow = FalseVal.getValue(0);
4091  SDValue FalseHigh = FalseVal.getValue(1);
4092 
4093  SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4094  ARMcc, CCR, Cmp);
4095  SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4096  ARMcc, CCR, duplicateCmp(Cmp, DAG)