LLVM 19.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
160 SelectionDAG &DAG, const SDLoc &DL) {
162 assert(Arg.ArgVT.bitsLT(MVT::i32));
163 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
164 SDValue Ext =
166 MVT::i32, Trunc);
167 return Ext;
168}
169
170void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
171 if (VT != PromotedLdStVT) {
173 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
174
176 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
177 }
178
179 MVT ElemTy = VT.getVectorElementType();
180 if (ElemTy != MVT::f64)
184 if (ElemTy == MVT::i32) {
189 } else {
194 }
203 if (VT.isInteger()) {
207 }
208
209 // Neon does not support vector divide/remainder operations.
218
219 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
220 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
222 setOperationAction(Opcode, VT, Legal);
223 if (!VT.isFloatingPoint())
224 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
225 setOperationAction(Opcode, VT, Legal);
226}
227
228void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
229 addRegisterClass(VT, &ARM::DPRRegClass);
230 addTypeForNEON(VT, MVT::f64);
231}
232
233void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
234 addRegisterClass(VT, &ARM::DPairRegClass);
235 addTypeForNEON(VT, MVT::v2f64);
236}
237
238void ARMTargetLowering::setAllExpand(MVT VT) {
239 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
240 setOperationAction(Opc, VT, Expand);
241
242 // We support these really simple operations even on types where all
243 // the actual arithmetic has to be broken down into simpler
244 // operations or turned into library calls.
249}
250
251void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
252 LegalizeAction Action) {
253 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
254 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
256}
257
258void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
259 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
260
261 for (auto VT : IntTypes) {
262 addRegisterClass(VT, &ARM::MQPRRegClass);
292
293 // No native support for these.
303
304 // Vector reductions
314
315 if (!HasMVEFP) {
320 } else {
323 }
324
325 // Pre and Post inc are supported on loads and stores
326 for (unsigned im = (unsigned)ISD::PRE_INC;
332 }
333 }
334
335 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
336 for (auto VT : FloatTypes) {
337 addRegisterClass(VT, &ARM::MQPRRegClass);
338 if (!HasMVEFP)
339 setAllExpand(VT);
340
341 // These are legal or custom whether we have MVE.fp or not
354
355 // Pre and Post inc are supported on loads and stores
356 for (unsigned im = (unsigned)ISD::PRE_INC;
362 }
363
364 if (HasMVEFP) {
372
373 // No native support for these.
388 }
389 }
390
391 // Custom Expand smaller than legal vector reductions to prevent false zero
392 // items being added.
401
402 // We 'support' these types up to bitcast/load/store level, regardless of
403 // MVE integer-only / float support. Only doing FP data processing on the FP
404 // vector types is inhibited at integer-only level.
405 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
406 for (auto VT : LongTypes) {
407 addRegisterClass(VT, &ARM::MQPRRegClass);
408 setAllExpand(VT);
414 }
416
417 // We can do bitwise operations on v2i64 vectors
418 setOperationAction(ISD::AND, MVT::v2i64, Legal);
419 setOperationAction(ISD::OR, MVT::v2i64, Legal);
420 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
421
422 // It is legal to extload from v4i8 to v4i16 or v4i32.
423 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
424 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
426
427 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
433
434 // Some truncating stores are legal too.
435 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
436 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
437 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
438
439 // Pre and Post inc on these are legal, given the correct extends
440 for (unsigned im = (unsigned)ISD::PRE_INC;
442 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
447 }
448 }
449
450 // Predicate types
451 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
452 for (auto VT : pTypes) {
453 addRegisterClass(VT, &ARM::VCCRRegClass);
468
469 if (!HasMVEFP) {
474 }
475 }
479 setOperationAction(ISD::OR, MVT::v2i1, Expand);
485
494}
495
497 const ARMSubtarget &STI)
498 : TargetLowering(TM), Subtarget(&STI) {
499 RegInfo = Subtarget->getRegisterInfo();
500 Itins = Subtarget->getInstrItineraryData();
501
504
505 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
506 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
507 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
508 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
509 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
510 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
512 }
513
514 if (Subtarget->isTargetMachO()) {
515 // Uses VFP for Thumb libfuncs if available.
516 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
517 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
518 static const struct {
519 const RTLIB::Libcall Op;
520 const char * const Name;
521 const ISD::CondCode Cond;
522 } LibraryCalls[] = {
523 // Single-precision floating-point arithmetic.
524 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
525 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
528
529 // Double-precision floating-point arithmetic.
530 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
531 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
534
535 // Single-precision comparisons.
536 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
537 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
538 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
539 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
540 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
541 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
542 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
543
544 // Double-precision comparisons.
545 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
546 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
547 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
548 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
549 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
550 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
551 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
552
553 // Floating-point to integer conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
557 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
560
561 // Conversions between floating types.
562 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
563 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
564
565 // Integer to floating-point conversions.
566 // i64 conversions are done via library routines even when generating VFP
567 // instructions, so use the same ones.
568 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
569 // e.g., __floatunsidf vs. __floatunssidfvfp.
570 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
571 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
573 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
574 };
575
576 for (const auto &LC : LibraryCalls) {
577 setLibcallName(LC.Op, LC.Name);
578 if (LC.Cond != ISD::SETCC_INVALID)
579 setCmpLibcallCC(LC.Op, LC.Cond);
580 }
581 }
582 }
583
584 // RTLIB
585 if (Subtarget->isAAPCS_ABI() &&
586 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
587 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
588 static const struct {
589 const RTLIB::Libcall Op;
590 const char * const Name;
591 const CallingConv::ID CC;
592 const ISD::CondCode Cond;
593 } LibraryCalls[] = {
594 // Double-precision floating-point arithmetic helper functions
595 // RTABI chapter 4.1.2, Table 2
596 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600
601 // Double-precision floating-point comparison helper functions
602 // RTABI chapter 4.1.2, Table 3
603 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
605 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
606 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
610
611 // Single-precision floating-point arithmetic helper functions
612 // RTABI chapter 4.1.2, Table 4
613 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617
618 // Single-precision floating-point comparison helper functions
619 // RTABI chapter 4.1.2, Table 5
620 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
622 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
623 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
627
628 // Floating-point to integer conversions.
629 // RTABI chapter 4.1.2, Table 6
630 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638
639 // Conversions between floating types.
640 // RTABI chapter 4.1.2, Table 7
641 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644
645 // Integer to floating-point conversions.
646 // RTABI chapter 4.1.2, Table 8
647 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655
656 // Long long helper functions
657 // RTABI chapter 4.2, Table 9
658 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662
663 // Integer division functions
664 // RTABI chapter 4.3.1
665 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 };
674
675 for (const auto &LC : LibraryCalls) {
676 setLibcallName(LC.Op, LC.Name);
677 setLibcallCallingConv(LC.Op, LC.CC);
678 if (LC.Cond != ISD::SETCC_INVALID)
679 setCmpLibcallCC(LC.Op, LC.Cond);
680 }
681
682 // EABI dependent RTLIB
683 if (TM.Options.EABIVersion == EABI::EABI4 ||
684 TM.Options.EABIVersion == EABI::EABI5) {
685 static const struct {
686 const RTLIB::Libcall Op;
687 const char *const Name;
688 const CallingConv::ID CC;
689 const ISD::CondCode Cond;
690 } MemOpsLibraryCalls[] = {
691 // Memory operations
692 // RTABI chapter 4.3.4
693 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
694 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 };
697
698 for (const auto &LC : MemOpsLibraryCalls) {
699 setLibcallName(LC.Op, LC.Name);
700 setLibcallCallingConv(LC.Op, LC.CC);
701 if (LC.Cond != ISD::SETCC_INVALID)
702 setCmpLibcallCC(LC.Op, LC.Cond);
703 }
704 }
705 }
706
707 if (Subtarget->isTargetWindows()) {
708 static const struct {
709 const RTLIB::Libcall Op;
710 const char * const Name;
711 const CallingConv::ID CC;
712 } LibraryCalls[] = {
713 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
721 };
722
723 for (const auto &LC : LibraryCalls) {
724 setLibcallName(LC.Op, LC.Name);
725 setLibcallCallingConv(LC.Op, LC.CC);
726 }
727 }
728
729 // Use divmod compiler-rt calls for iOS 5.0 and later.
730 if (Subtarget->isTargetMachO() &&
731 !(Subtarget->isTargetIOS() &&
732 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
733 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
734 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
735 }
736
737 // The half <-> float conversion functions are always soft-float on
738 // non-watchos platforms, but are needed for some targets which use a
739 // hard-float calling convention by default.
740 if (!Subtarget->isTargetWatchABI()) {
741 if (Subtarget->isAAPCS_ABI()) {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
745 } else {
746 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
747 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
749 }
750 }
751
752 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
753 // a __gnu_ prefix (which is the default).
754 if (Subtarget->isTargetAEABI()) {
755 static const struct {
756 const RTLIB::Libcall Op;
757 const char * const Name;
758 const CallingConv::ID CC;
759 } LibraryCalls[] = {
760 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
761 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
763 };
764
765 for (const auto &LC : LibraryCalls) {
766 setLibcallName(LC.Op, LC.Name);
767 setLibcallCallingConv(LC.Op, LC.CC);
768 }
769 }
770
771 if (Subtarget->isThumb1Only())
772 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
773 else
774 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
775
776 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
777 Subtarget->hasFPRegs()) {
778 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
779 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
780
785
786 if (!Subtarget->hasVFP2Base())
787 setAllExpand(MVT::f32);
788 if (!Subtarget->hasFP64())
789 setAllExpand(MVT::f64);
790 }
791
792 if (Subtarget->hasFullFP16()) {
793 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
796
799 }
800
801 if (Subtarget->hasBF16()) {
802 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
803 setAllExpand(MVT::bf16);
804 if (!Subtarget->hasFullFP16())
806 }
807
809 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
810 setTruncStoreAction(VT, InnerVT, Expand);
811 addAllExtLoads(VT, InnerVT, Expand);
812 }
813
816
818 }
819
822
825
826 if (Subtarget->hasMVEIntegerOps())
827 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
828
829 // Combine low-overhead loop intrinsics so that we can lower i1 types.
830 if (Subtarget->hasLOB()) {
832 }
833
834 if (Subtarget->hasNEON()) {
835 addDRTypeForNEON(MVT::v2f32);
836 addDRTypeForNEON(MVT::v8i8);
837 addDRTypeForNEON(MVT::v4i16);
838 addDRTypeForNEON(MVT::v2i32);
839 addDRTypeForNEON(MVT::v1i64);
840
841 addQRTypeForNEON(MVT::v4f32);
842 addQRTypeForNEON(MVT::v2f64);
843 addQRTypeForNEON(MVT::v16i8);
844 addQRTypeForNEON(MVT::v8i16);
845 addQRTypeForNEON(MVT::v4i32);
846 addQRTypeForNEON(MVT::v2i64);
847
848 if (Subtarget->hasFullFP16()) {
849 addQRTypeForNEON(MVT::v8f16);
850 addDRTypeForNEON(MVT::v4f16);
851 }
852
853 if (Subtarget->hasBF16()) {
854 addQRTypeForNEON(MVT::v8bf16);
855 addDRTypeForNEON(MVT::v4bf16);
856 }
857 }
858
859 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
860 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
861 // none of Neon, MVE or VFP supports any arithmetic operations on it.
862 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
863 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
864 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
865 // FIXME: Code duplication: FDIV and FREM are expanded always, see
866 // ARMTargetLowering::addTypeForNEON method for details.
867 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
868 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
869 // FIXME: Create unittest.
870 // In another words, find a way when "copysign" appears in DAG with vector
871 // operands.
873 // FIXME: Code duplication: SETCC has custom operation action, see
874 // ARMTargetLowering::addTypeForNEON method for details.
876 // FIXME: Create unittest for FNEG and for FABS.
877 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
878 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
880 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
881 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
882 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
883 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
884 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
887 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
890 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
896 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
897 }
898
899 if (Subtarget->hasNEON()) {
900 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
901 // supported for v4f32.
903 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
904 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
905 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
906 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
907 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
910 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
918
919 // Mark v2f32 intrinsics.
921 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
922 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
923 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
924 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
925 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
928 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
936
937 // Neon does not support some operations on v1i64 and v2i64 types.
938 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
939 // Custom handling for some quad-vector types to detect VMULL.
940 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
941 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
942 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
943 // Custom handling for some vector types to avoid expensive expansions
944 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
946 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
948 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
949 // a destination type that is wider than the source, and nor does
950 // it have a FP_TO_[SU]INT instruction with a narrower destination than
951 // source.
960
963
964 // NEON does not have single instruction CTPOP for vectors with element
965 // types wider than 8-bits. However, custom lowering can leverage the
966 // v8i8/v16i8 vcnt instruction.
973
974 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
975 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
976
977 // NEON does not have single instruction CTTZ for vectors.
979 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
980 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
981 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
982
983 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
986 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
987
992
997
1001 }
1002
1003 // NEON only has FMA instructions as of VFP4.
1004 if (!Subtarget->hasVFP4Base()) {
1005 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1006 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1007 }
1008
1011
1012 // It is legal to extload from v4i8 to v4i16 or v4i32.
1013 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1014 MVT::v2i32}) {
1019 }
1020 }
1021
1022 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1023 MVT::v4i32}) {
1028 }
1029 }
1030
1031 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1038 }
1039 if (Subtarget->hasMVEIntegerOps()) {
1042 ISD::SETCC});
1043 }
1044 if (Subtarget->hasMVEFloatOps()) {
1046 }
1047
1048 if (!Subtarget->hasFP64()) {
1049 // When targeting a floating-point unit with only single-precision
1050 // operations, f64 is legal for the few double-precision instructions which
1051 // are present However, no double-precision operations other than moves,
1052 // loads and stores are provided by the hardware.
1090 }
1091
1092 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1095 if (Subtarget->hasFullFP16()) {
1098 }
1099 }
1100
1101 if (!Subtarget->hasFP16()) {
1104 }
1105
1107
1108 // ARM does not have floating-point extending loads.
1109 for (MVT VT : MVT::fp_valuetypes()) {
1110 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1111 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1112 }
1113
1114 // ... or truncating stores
1115 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1116 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1117 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1118
1119 // ARM does not have i1 sign extending load.
1120 for (MVT VT : MVT::integer_valuetypes())
1121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1122
1123 // ARM supports all 4 flavors of integer indexed load / store.
1124 if (!Subtarget->isThumb1Only()) {
1125 for (unsigned im = (unsigned)ISD::PRE_INC;
1127 setIndexedLoadAction(im, MVT::i1, Legal);
1128 setIndexedLoadAction(im, MVT::i8, Legal);
1129 setIndexedLoadAction(im, MVT::i16, Legal);
1130 setIndexedLoadAction(im, MVT::i32, Legal);
1131 setIndexedStoreAction(im, MVT::i1, Legal);
1132 setIndexedStoreAction(im, MVT::i8, Legal);
1133 setIndexedStoreAction(im, MVT::i16, Legal);
1134 setIndexedStoreAction(im, MVT::i32, Legal);
1135 }
1136 } else {
1137 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1140 }
1141
1146
1149 if (Subtarget->hasDSP()) {
1158 }
1159 if (Subtarget->hasBaseDSP()) {
1162 }
1163
1164 // i64 operation support.
1167 if (Subtarget->isThumb1Only()) {
1170 }
1171 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1172 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1174
1184
1185 // MVE lowers 64 bit shifts to lsll and lsrl
1186 // assuming that ISD::SRL and SRA of i64 are already marked custom
1187 if (Subtarget->hasMVEIntegerOps())
1189
1190 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1191 if (Subtarget->isThumb1Only()) {
1195 }
1196
1197 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1199
1200 // ARM does not have ROTL.
1205 }
1208 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1211 }
1212
1213 // @llvm.readcyclecounter requires the Performance Monitors extension.
1214 // Default to the 0 expansion on unsupported platforms.
1215 // FIXME: Technically there are older ARM CPUs that have
1216 // implementation-specific ways of obtaining this information.
1217 if (Subtarget->hasPerfMon())
1219
1220 // Only ARMv6 has BSWAP.
1221 if (!Subtarget->hasV6Ops())
1223
1224 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1225 : Subtarget->hasDivideInARMMode();
1226 if (!hasDivide) {
1227 // These are expanded into libcalls if the cpu doesn't have HW divider.
1230 }
1231
1232 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1235
1238 }
1239
1242
1243 // Register based DivRem for AEABI (RTABI 4.2)
1244 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1245 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1246 Subtarget->isTargetWindows()) {
1249 HasStandaloneRem = false;
1250
1251 if (Subtarget->isTargetWindows()) {
1252 const struct {
1253 const RTLIB::Libcall Op;
1254 const char * const Name;
1255 const CallingConv::ID CC;
1256 } LibraryCalls[] = {
1257 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1259 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1260 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1261
1262 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1263 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1264 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1265 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1266 };
1267
1268 for (const auto &LC : LibraryCalls) {
1269 setLibcallName(LC.Op, LC.Name);
1270 setLibcallCallingConv(LC.Op, LC.CC);
1271 }
1272 } else {
1273 const struct {
1274 const RTLIB::Libcall Op;
1275 const char * const Name;
1276 const CallingConv::ID CC;
1277 } LibraryCalls[] = {
1278 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1280 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1281 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1282
1283 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1284 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1285 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1286 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1287 };
1288
1289 for (const auto &LC : LibraryCalls) {
1290 setLibcallName(LC.Op, LC.Name);
1291 setLibcallCallingConv(LC.Op, LC.CC);
1292 }
1293 }
1294
1299 } else {
1302 }
1303
1308
1309 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1311
1312 // Use the default implementation.
1314 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1316 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1319
1320 if (Subtarget->isTargetWindows())
1322 else
1324
1325 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1326 // the default expansion.
1327 InsertFencesForAtomic = false;
1328 if (Subtarget->hasAnyDataBarrier() &&
1329 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1330 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1331 // to ldrex/strex loops already.
1333 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1335
1336 // On v8, we have particularly efficient implementations of atomic fences
1337 // if they can be combined with nearby atomic loads and stores.
1338 if (!Subtarget->hasAcquireRelease() ||
1339 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1340 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1341 InsertFencesForAtomic = true;
1342 }
1343 } else {
1344 // If there's anything we can use as a barrier, go through custom lowering
1345 // for ATOMIC_FENCE.
1346 // If target has DMB in thumb, Fences can be inserted.
1347 if (Subtarget->hasDataBarrier())
1348 InsertFencesForAtomic = true;
1349
1351 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1352
1353 // Set them all for libcall, which will force libcalls.
1366 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1367 // Unordered/Monotonic case.
1368 if (!InsertFencesForAtomic) {
1371 }
1372 }
1373
1374 // Compute supported atomic widths.
1375 if (Subtarget->isTargetLinux() ||
1376 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1377 // For targets where __sync_* routines are reliably available, we use them
1378 // if necessary.
1379 //
1380 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1381 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1382 //
1383 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1384 // such targets should provide __sync_* routines, which use the ARM mode
1385 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1386 // encoding; see ARMISD::MEMBARRIER_MCR.)
1388 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1389 Subtarget->hasForced32BitAtomics()) {
1390 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1392 } else {
1393 // We can't assume anything about other targets; just use libatomic
1394 // routines.
1396 }
1397
1399
1401
1402 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1403 if (!Subtarget->hasV6Ops()) {
1406 }
1408
1409 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1410 !Subtarget->isThumb1Only()) {
1411 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1412 // iff target supports vfp2.
1422 }
1423
1424 // We want to custom lower some of our intrinsics.
1429 if (Subtarget->useSjLjEH())
1430 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1431
1441 if (Subtarget->hasFullFP16()) {
1445 }
1446
1448
1451 if (Subtarget->hasFullFP16())
1455 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1456
1457 // We don't support sin/cos/fmod/copysign/pow
1466 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1467 !Subtarget->isThumb1Only()) {
1470 }
1473
1474 if (!Subtarget->hasVFP4Base()) {
1477 }
1478
1479 // Various VFP goodness
1480 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1481 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1482 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1485 }
1486
1487 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1488 if (!Subtarget->hasFP16()) {
1491 }
1492
1493 // Strict floating-point comparisons need custom lowering.
1500 }
1501
1502 // Use __sincos_stret if available.
1503 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1504 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1507 }
1508
1509 // FP-ARMv8 implements a lot of rounding-like FP operations.
1510 if (Subtarget->hasFPARMv8Base()) {
1519 if (Subtarget->hasNEON()) {
1524 }
1525
1526 if (Subtarget->hasFP64()) {
1535 }
1536 }
1537
1538 // FP16 often need to be promoted to call lib functions
1539 if (Subtarget->hasFullFP16()) {
1554
1556 }
1557
1558 if (Subtarget->hasNEON()) {
1559 // vmin and vmax aren't available in a scalar form, so we can use
1560 // a NEON instruction with an undef lane instead.
1569
1570 if (Subtarget->hasFullFP16()) {
1575
1580 }
1581 }
1582
1583 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1584 // it, but it's just a wrapper around ldexp.
1585 if (Subtarget->isTargetWindows()) {
1587 if (isOperationExpand(Op, MVT::f32))
1588 setOperationAction(Op, MVT::f32, Promote);
1589 }
1590
1591 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1592 // isn't legal.
1594 if (isOperationExpand(Op, MVT::f16))
1595 setOperationAction(Op, MVT::f16, Promote);
1596
1597 // We have target-specific dag combine patterns for the following nodes:
1598 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1601
1602 if (Subtarget->hasMVEIntegerOps())
1604
1605 if (Subtarget->hasV6Ops())
1607 if (Subtarget->isThumb1Only())
1609 // Attempt to lower smin/smax to ssat/usat
1610 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1611 Subtarget->isThumb2()) {
1613 }
1614
1616
1617 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1618 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1620 else
1622
1623 //// temporary - rewrite interface to use type
1626 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1628 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1630
1631 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1632 // are at least 4 bytes aligned.
1634
1635 // Prefer likely predicted branches to selects on out-of-order cores.
1636 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1637
1638 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1640
1641 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1642}
1643
1645 return Subtarget->useSoftFloat();
1646}
1647
1648// FIXME: It might make sense to define the representative register class as the
1649// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1650// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1651// SPR's representative would be DPR_VFP2. This should work well if register
1652// pressure tracking were modified such that a register use would increment the
1653// pressure of the register class's representative and all of it's super
1654// classes' representatives transitively. We have not implemented this because
1655// of the difficulty prior to coalescing of modeling operand register classes
1656// due to the common occurrence of cross class copies and subregister insertions
1657// and extractions.
1658std::pair<const TargetRegisterClass *, uint8_t>
1660 MVT VT) const {
1661 const TargetRegisterClass *RRC = nullptr;
1662 uint8_t Cost = 1;
1663 switch (VT.SimpleTy) {
1664 default:
1666 // Use DPR as representative register class for all floating point
1667 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1668 // the cost is 1 for both f32 and f64.
1669 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1670 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1671 RRC = &ARM::DPRRegClass;
1672 // When NEON is used for SP, only half of the register file is available
1673 // because operations that define both SP and DP results will be constrained
1674 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1675 // coalescing by double-counting the SP regs. See the FIXME above.
1676 if (Subtarget->useNEONForSinglePrecisionFP())
1677 Cost = 2;
1678 break;
1679 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1680 case MVT::v4f32: case MVT::v2f64:
1681 RRC = &ARM::DPRRegClass;
1682 Cost = 2;
1683 break;
1684 case MVT::v4i64:
1685 RRC = &ARM::DPRRegClass;
1686 Cost = 4;
1687 break;
1688 case MVT::v8i64:
1689 RRC = &ARM::DPRRegClass;
1690 Cost = 8;
1691 break;
1692 }
1693 return std::make_pair(RRC, Cost);
1694}
1695
1696const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1697#define MAKE_CASE(V) \
1698 case V: \
1699 return #V;
1700 switch ((ARMISD::NodeType)Opcode) {
1702 break;
1905#undef MAKE_CASE
1906 }
1907 return nullptr;
1908}
1909
1911 EVT VT) const {
1912 if (!VT.isVector())
1913 return getPointerTy(DL);
1914
1915 // MVE has a predicate register.
1916 if ((Subtarget->hasMVEIntegerOps() &&
1917 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1918 VT == MVT::v16i8)) ||
1919 (Subtarget->hasMVEFloatOps() &&
1920 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1921 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1923}
1924
1925/// getRegClassFor - Return the register class that should be used for the
1926/// specified value type.
1927const TargetRegisterClass *
1928ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1929 (void)isDivergent;
1930 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1931 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1932 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1933 // MVE Q registers.
1934 if (Subtarget->hasNEON()) {
1935 if (VT == MVT::v4i64)
1936 return &ARM::QQPRRegClass;
1937 if (VT == MVT::v8i64)
1938 return &ARM::QQQQPRRegClass;
1939 }
1940 if (Subtarget->hasMVEIntegerOps()) {
1941 if (VT == MVT::v4i64)
1942 return &ARM::MQQPRRegClass;
1943 if (VT == MVT::v8i64)
1944 return &ARM::MQQQQPRRegClass;
1945 }
1947}
1948
1949// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1950// source/dest is aligned and the copy size is large enough. We therefore want
1951// to align such objects passed to memory intrinsics.
1953 Align &PrefAlign) const {
1954 if (!isa<MemIntrinsic>(CI))
1955 return false;
1956 MinSize = 8;
1957 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1958 // cycle faster than 4-byte aligned LDM.
1959 PrefAlign =
1960 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1961 return true;
1962}
1963
1964// Create a fast isel object.
1965FastISel *
1967 const TargetLibraryInfo *libInfo) const {
1968 return ARM::createFastISel(funcInfo, libInfo);
1969}
1970
1972 unsigned NumVals = N->getNumValues();
1973 if (!NumVals)
1974 return Sched::RegPressure;
1975
1976 for (unsigned i = 0; i != NumVals; ++i) {
1977 EVT VT = N->getValueType(i);
1978 if (VT == MVT::Glue || VT == MVT::Other)
1979 continue;
1980 if (VT.isFloatingPoint() || VT.isVector())
1981 return Sched::ILP;
1982 }
1983
1984 if (!N->isMachineOpcode())
1985 return Sched::RegPressure;
1986
1987 // Load are scheduled for latency even if there instruction itinerary
1988 // is not available.
1989 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1990 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1991
1992 if (MCID.getNumDefs() == 0)
1993 return Sched::RegPressure;
1994 if (!Itins->isEmpty() &&
1995 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1996 return Sched::ILP;
1997
1998 return Sched::RegPressure;
1999}
2000
2001//===----------------------------------------------------------------------===//
2002// Lowering Code
2003//===----------------------------------------------------------------------===//
2004
2005static bool isSRL16(const SDValue &Op) {
2006 if (Op.getOpcode() != ISD::SRL)
2007 return false;
2008 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2009 return Const->getZExtValue() == 16;
2010 return false;
2011}
2012
2013static bool isSRA16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SRA)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021static bool isSHL16(const SDValue &Op) {
2022 if (Op.getOpcode() != ISD::SHL)
2023 return false;
2024 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2025 return Const->getZExtValue() == 16;
2026 return false;
2027}
2028
2029// Check for a signed 16-bit value. We special case SRA because it makes it
2030// more simple when also looking for SRAs that aren't sign extending a
2031// smaller value. Without the check, we'd need to take extra care with
2032// checking order for some operations.
2033static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2034 if (isSRA16(Op))
2035 return isSHL16(Op.getOperand(0));
2036 return DAG.ComputeNumSignBits(Op) == 17;
2037}
2038
2039/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2041 switch (CC) {
2042 default: llvm_unreachable("Unknown condition code!");
2043 case ISD::SETNE: return ARMCC::NE;
2044 case ISD::SETEQ: return ARMCC::EQ;
2045 case ISD::SETGT: return ARMCC::GT;
2046 case ISD::SETGE: return ARMCC::GE;
2047 case ISD::SETLT: return ARMCC::LT;
2048 case ISD::SETLE: return ARMCC::LE;
2049 case ISD::SETUGT: return ARMCC::HI;
2050 case ISD::SETUGE: return ARMCC::HS;
2051 case ISD::SETULT: return ARMCC::LO;
2052 case ISD::SETULE: return ARMCC::LS;
2053 }
2054}
2055
2056/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2058 ARMCC::CondCodes &CondCode2) {
2059 CondCode2 = ARMCC::AL;
2060 switch (CC) {
2061 default: llvm_unreachable("Unknown FP condition!");
2062 case ISD::SETEQ:
2063 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2064 case ISD::SETGT:
2065 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2066 case ISD::SETGE:
2067 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2068 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2069 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2070 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2071 case ISD::SETO: CondCode = ARMCC::VC; break;
2072 case ISD::SETUO: CondCode = ARMCC::VS; break;
2073 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2074 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2075 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2076 case ISD::SETLT:
2077 case ISD::SETULT: CondCode = ARMCC::LT; break;
2078 case ISD::SETLE:
2079 case ISD::SETULE: CondCode = ARMCC::LE; break;
2080 case ISD::SETNE:
2081 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2082 }
2083}
2084
2085//===----------------------------------------------------------------------===//
2086// Calling Convention Implementation
2087//===----------------------------------------------------------------------===//
2088
2089/// getEffectiveCallingConv - Get the effective calling convention, taking into
2090/// account presence of floating point hardware and calling convention
2091/// limitations, such as support for variadic functions.
2093ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2094 bool isVarArg) const {
2095 switch (CC) {
2096 default:
2097 report_fatal_error("Unsupported calling convention");
2100 case CallingConv::GHC:
2102 return CC;
2108 case CallingConv::Swift:
2111 case CallingConv::C:
2112 case CallingConv::Tail:
2113 if (!Subtarget->isAAPCS_ABI())
2114 return CallingConv::ARM_APCS;
2115 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2116 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2117 !isVarArg)
2119 else
2121 case CallingConv::Fast:
2123 if (!Subtarget->isAAPCS_ABI()) {
2124 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2125 return CallingConv::Fast;
2126 return CallingConv::ARM_APCS;
2127 } else if (Subtarget->hasVFP2Base() &&
2128 !Subtarget->isThumb1Only() && !isVarArg)
2130 else
2132 }
2133}
2134
2136 bool isVarArg) const {
2137 return CCAssignFnForNode(CC, false, isVarArg);
2138}
2139
2141 bool isVarArg) const {
2142 return CCAssignFnForNode(CC, true, isVarArg);
2143}
2144
2145/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2146/// CallingConvention.
2147CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2148 bool Return,
2149 bool isVarArg) const {
2150 switch (getEffectiveCallingConv(CC, isVarArg)) {
2151 default:
2152 report_fatal_error("Unsupported calling convention");
2154 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2158 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2159 case CallingConv::Fast:
2160 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2161 case CallingConv::GHC:
2162 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2166 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2168 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2169 }
2170}
2171
2172SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2173 MVT LocVT, MVT ValVT, SDValue Val) const {
2174 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2175 Val);
2176 if (Subtarget->hasFullFP16()) {
2177 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2178 } else {
2179 Val = DAG.getNode(ISD::TRUNCATE, dl,
2180 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2181 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2182 }
2183 return Val;
2184}
2185
2186SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2187 MVT LocVT, MVT ValVT,
2188 SDValue Val) const {
2189 if (Subtarget->hasFullFP16()) {
2190 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2191 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2192 } else {
2193 Val = DAG.getNode(ISD::BITCAST, dl,
2194 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2195 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2196 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2197 }
2198 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2199}
2200
2201/// LowerCallResult - Lower the result values of a call into the
2202/// appropriate copies out of appropriate physical registers.
2203SDValue ARMTargetLowering::LowerCallResult(
2204 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2205 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2206 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2207 SDValue ThisVal, bool isCmseNSCall) const {
2208 // Assign locations to each value returned by this call.
2210 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2211 *DAG.getContext());
2212 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2213
2214 // Copy all of the result registers out of their specified physreg.
2215 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2216 CCValAssign VA = RVLocs[i];
2217
2218 // Pass 'this' value directly from the argument to return value, to avoid
2219 // reg unit interference
2220 if (i == 0 && isThisReturn) {
2221 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2222 "unexpected return calling convention register assignment");
2223 InVals.push_back(ThisVal);
2224 continue;
2225 }
2226
2227 SDValue Val;
2228 if (VA.needsCustom() &&
2229 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2230 // Handle f64 or half of a v2f64.
2231 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2232 InGlue);
2233 Chain = Lo.getValue(1);
2234 InGlue = Lo.getValue(2);
2235 VA = RVLocs[++i]; // skip ahead to next loc
2236 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2237 InGlue);
2238 Chain = Hi.getValue(1);
2239 InGlue = Hi.getValue(2);
2240 if (!Subtarget->isLittle())
2241 std::swap (Lo, Hi);
2242 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2243
2244 if (VA.getLocVT() == MVT::v2f64) {
2245 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2246 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2247 DAG.getConstant(0, dl, MVT::i32));
2248
2249 VA = RVLocs[++i]; // skip ahead to next loc
2250 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2251 Chain = Lo.getValue(1);
2252 InGlue = Lo.getValue(2);
2253 VA = RVLocs[++i]; // skip ahead to next loc
2254 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2255 Chain = Hi.getValue(1);
2256 InGlue = Hi.getValue(2);
2257 if (!Subtarget->isLittle())
2258 std::swap (Lo, Hi);
2259 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2260 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2261 DAG.getConstant(1, dl, MVT::i32));
2262 }
2263 } else {
2264 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2265 InGlue);
2266 Chain = Val.getValue(1);
2267 InGlue = Val.getValue(2);
2268 }
2269
2270 switch (VA.getLocInfo()) {
2271 default: llvm_unreachable("Unknown loc info!");
2272 case CCValAssign::Full: break;
2273 case CCValAssign::BCvt:
2274 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2275 break;
2276 }
2277
2278 // f16 arguments have their size extended to 4 bytes and passed as if they
2279 // had been copied to the LSBs of a 32-bit register.
2280 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2281 if (VA.needsCustom() &&
2282 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2283 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2284
2285 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2286 // is less than 32 bits must be sign- or zero-extended after the call for
2287 // security reasons. Although the ABI mandates an extension done by the
2288 // callee, the latter cannot be trusted to follow the rules of the ABI.
2289 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2290 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2291 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2292 Val = handleCMSEValue(Val, Arg, DAG, dl);
2293
2294 InVals.push_back(Val);
2295 }
2296
2297 return Chain;
2298}
2299
2300std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2301 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2302 bool IsTailCall, int SPDiff) const {
2303 SDValue DstAddr;
2304 MachinePointerInfo DstInfo;
2305 int32_t Offset = VA.getLocMemOffset();
2307
2308 if (IsTailCall) {
2309 Offset += SPDiff;
2310 auto PtrVT = getPointerTy(DAG.getDataLayout());
2311 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2312 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2313 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2314 DstInfo =
2316 } else {
2317 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2318 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2319 StackPtr, PtrOff);
2320 DstInfo =
2322 }
2323
2324 return std::make_pair(DstAddr, DstInfo);
2325}
2326
2327void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2328 SDValue Chain, SDValue &Arg,
2329 RegsToPassVector &RegsToPass,
2330 CCValAssign &VA, CCValAssign &NextVA,
2331 SDValue &StackPtr,
2332 SmallVectorImpl<SDValue> &MemOpChains,
2333 bool IsTailCall,
2334 int SPDiff) const {
2335 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2336 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2337 unsigned id = Subtarget->isLittle() ? 0 : 1;
2338 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2339
2340 if (NextVA.isRegLoc())
2341 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2342 else {
2343 assert(NextVA.isMemLoc());
2344 if (!StackPtr.getNode())
2345 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2347
2348 SDValue DstAddr;
2349 MachinePointerInfo DstInfo;
2350 std::tie(DstAddr, DstInfo) =
2351 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2352 MemOpChains.push_back(
2353 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2354 }
2355}
2356
2357static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2358 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2360}
2361
2362/// LowerCall - Lowering a call into a callseq_start <-
2363/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2364/// nodes.
2365SDValue
2366ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2367 SmallVectorImpl<SDValue> &InVals) const {
2368 SelectionDAG &DAG = CLI.DAG;
2369 SDLoc &dl = CLI.DL;
2371 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2373 SDValue Chain = CLI.Chain;
2374 SDValue Callee = CLI.Callee;
2375 bool &isTailCall = CLI.IsTailCall;
2376 CallingConv::ID CallConv = CLI.CallConv;
2377 bool doesNotRet = CLI.DoesNotReturn;
2378 bool isVarArg = CLI.IsVarArg;
2379
2383 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2384 bool isThisReturn = false;
2385 bool isCmseNSCall = false;
2386 bool isSibCall = false;
2387 bool PreferIndirect = false;
2388 bool GuardWithBTI = false;
2389
2390 // Analyze operands of the call, assigning locations to each operand.
2392 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2393 *DAG.getContext());
2394 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2395
2396 // Lower 'returns_twice' calls to a pseudo-instruction.
2397 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2398 !Subtarget->noBTIAtReturnTwice())
2399 GuardWithBTI = AFI->branchTargetEnforcement();
2400
2401 // Determine whether this is a non-secure function call.
2402 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2403 isCmseNSCall = true;
2404
2405 // Disable tail calls if they're not supported.
2406 if (!Subtarget->supportsTailCall())
2407 isTailCall = false;
2408
2409 // For both the non-secure calls and the returns from a CMSE entry function,
2410 // the function needs to do some extra work afte r the call, or before the
2411 // return, respectively, thus it cannot end with atail call
2412 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2413 isTailCall = false;
2414
2415 if (isa<GlobalAddressSDNode>(Callee)) {
2416 // If we're optimizing for minimum size and the function is called three or
2417 // more times in this block, we can improve codesize by calling indirectly
2418 // as BLXr has a 16-bit encoding.
2419 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2420 if (CLI.CB) {
2421 auto *BB = CLI.CB->getParent();
2422 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2423 count_if(GV->users(), [&BB](const User *U) {
2424 return isa<Instruction>(U) &&
2425 cast<Instruction>(U)->getParent() == BB;
2426 }) > 2;
2427 }
2428 }
2429 if (isTailCall) {
2430 // Check if it's really possible to do a tail call.
2431 isTailCall =
2432 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2433
2434 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2435 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2436 isSibCall = true;
2437
2438 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2439 // detected sibcalls.
2440 if (isTailCall)
2441 ++NumTailCalls;
2442 }
2443
2444 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2445 report_fatal_error("failed to perform tail call elimination on a call "
2446 "site marked musttail");
2447
2448 // Get a count of how many bytes are to be pushed on the stack.
2449 unsigned NumBytes = CCInfo.getStackSize();
2450
2451 // SPDiff is the byte offset of the call's argument area from the callee's.
2452 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2453 // by this amount for a tail call. In a sibling call it must be 0 because the
2454 // caller will deallocate the entire stack and the callee still expects its
2455 // arguments to begin at SP+0. Completely unused for non-tail calls.
2456 int SPDiff = 0;
2457
2458 if (isTailCall && !isSibCall) {
2459 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2460 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2461
2462 // Since callee will pop argument stack as a tail call, we must keep the
2463 // popped size 16-byte aligned.
2464 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2465 NumBytes = alignTo(NumBytes, StackAlign);
2466
2467 // SPDiff will be negative if this tail call requires more space than we
2468 // would automatically have in our incoming argument space. Positive if we
2469 // can actually shrink the stack.
2470 SPDiff = NumReusableBytes - NumBytes;
2471
2472 // If this call requires more stack than we have available from
2473 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2474 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2475 AFI->setArgRegsSaveSize(-SPDiff);
2476 }
2477
2478 if (isSibCall) {
2479 // For sibling tail calls, memory operands are available in our caller's stack.
2480 NumBytes = 0;
2481 } else {
2482 // Adjust the stack pointer for the new arguments...
2483 // These operations are automatically eliminated by the prolog/epilog pass
2484 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2485 }
2486
2488 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2489
2490 RegsToPassVector RegsToPass;
2491 SmallVector<SDValue, 8> MemOpChains;
2492
2493 // During a tail call, stores to the argument area must happen after all of
2494 // the function's incoming arguments have been loaded because they may alias.
2495 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2496 // there's no point in doing so repeatedly so this tracks whether that's
2497 // happened yet.
2498 bool AfterFormalArgLoads = false;
2499
2500 // Walk the register/memloc assignments, inserting copies/loads. In the case
2501 // of tail call optimization, arguments are handled later.
2502 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2503 i != e;
2504 ++i, ++realArgIdx) {
2505 CCValAssign &VA = ArgLocs[i];
2506 SDValue Arg = OutVals[realArgIdx];
2507 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2508 bool isByVal = Flags.isByVal();
2509
2510 // Promote the value if needed.
2511 switch (VA.getLocInfo()) {
2512 default: llvm_unreachable("Unknown loc info!");
2513 case CCValAssign::Full: break;
2514 case CCValAssign::SExt:
2515 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2516 break;
2517 case CCValAssign::ZExt:
2518 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2519 break;
2520 case CCValAssign::AExt:
2521 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2522 break;
2523 case CCValAssign::BCvt:
2524 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2525 break;
2526 }
2527
2528 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2529 Chain = DAG.getStackArgumentTokenFactor(Chain);
2530 AfterFormalArgLoads = true;
2531 }
2532
2533 // f16 arguments have their size extended to 4 bytes and passed as if they
2534 // had been copied to the LSBs of a 32-bit register.
2535 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2536 if (VA.needsCustom() &&
2537 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2538 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2539 } else {
2540 // f16 arguments could have been extended prior to argument lowering.
2541 // Mask them arguments if this is a CMSE nonsecure call.
2542 auto ArgVT = Outs[realArgIdx].ArgVT;
2543 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2544 auto LocBits = VA.getLocVT().getSizeInBits();
2545 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2546 SDValue Mask =
2547 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2548 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2549 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2550 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2551 }
2552 }
2553
2554 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2555 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2556 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2557 DAG.getConstant(0, dl, MVT::i32));
2558 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2559 DAG.getConstant(1, dl, MVT::i32));
2560
2561 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2562 StackPtr, MemOpChains, isTailCall, SPDiff);
2563
2564 VA = ArgLocs[++i]; // skip ahead to next loc
2565 if (VA.isRegLoc()) {
2566 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2567 StackPtr, MemOpChains, isTailCall, SPDiff);
2568 } else {
2569 assert(VA.isMemLoc());
2570 SDValue DstAddr;
2571 MachinePointerInfo DstInfo;
2572 std::tie(DstAddr, DstInfo) =
2573 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2574 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2575 }
2576 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2577 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2578 StackPtr, MemOpChains, isTailCall, SPDiff);
2579 } else if (VA.isRegLoc()) {
2580 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2581 Outs[0].VT == MVT::i32) {
2582 assert(VA.getLocVT() == MVT::i32 &&
2583 "unexpected calling convention register assignment");
2584 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2585 "unexpected use of 'returned'");
2586 isThisReturn = true;
2587 }
2588 const TargetOptions &Options = DAG.getTarget().Options;
2589 if (Options.EmitCallSiteInfo)
2590 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2591 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2592 } else if (isByVal) {
2593 assert(VA.isMemLoc());
2594 unsigned offset = 0;
2595
2596 // True if this byval aggregate will be split between registers
2597 // and memory.
2598 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2599 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2600
2601 if (CurByValIdx < ByValArgsCount) {
2602
2603 unsigned RegBegin, RegEnd;
2604 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2605
2606 EVT PtrVT =
2608 unsigned int i, j;
2609 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2610 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2611 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2612 SDValue Load =
2613 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2614 DAG.InferPtrAlign(AddArg));
2615 MemOpChains.push_back(Load.getValue(1));
2616 RegsToPass.push_back(std::make_pair(j, Load));
2617 }
2618
2619 // If parameter size outsides register area, "offset" value
2620 // helps us to calculate stack slot for remained part properly.
2621 offset = RegEnd - RegBegin;
2622
2623 CCInfo.nextInRegsParam();
2624 }
2625
2626 if (Flags.getByValSize() > 4*offset) {
2627 auto PtrVT = getPointerTy(DAG.getDataLayout());
2628 SDValue Dst;
2629 MachinePointerInfo DstInfo;
2630 std::tie(Dst, DstInfo) =
2631 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2632 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2633 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2634 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2635 MVT::i32);
2636 SDValue AlignNode =
2637 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2638
2639 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2640 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2641 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2642 Ops));
2643 }
2644 } else {
2645 assert(VA.isMemLoc());
2646 SDValue DstAddr;
2647 MachinePointerInfo DstInfo;
2648 std::tie(DstAddr, DstInfo) =
2649 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2650
2651 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2652 MemOpChains.push_back(Store);
2653 }
2654 }
2655
2656 if (!MemOpChains.empty())
2657 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2658
2659 // Build a sequence of copy-to-reg nodes chained together with token chain
2660 // and flag operands which copy the outgoing args into the appropriate regs.
2661 SDValue InGlue;
2662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2663 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2664 RegsToPass[i].second, InGlue);
2665 InGlue = Chain.getValue(1);
2666 }
2667
2668 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2669 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2670 // node so that legalize doesn't hack it.
2671 bool isDirect = false;
2672
2674 const GlobalValue *GVal = nullptr;
2675 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2676 GVal = G->getGlobal();
2677 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2678
2679 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2680 bool isLocalARMFunc = false;
2681 auto PtrVt = getPointerTy(DAG.getDataLayout());
2682
2683 if (Subtarget->genLongCalls()) {
2684 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2685 "long-calls codegen is not position independent!");
2686 // Handle a global address or an external symbol. If it's not one of
2687 // those, the target's already in a register, so we don't need to do
2688 // anything extra.
2689 if (isa<GlobalAddressSDNode>(Callee)) {
2690 if (Subtarget->genExecuteOnly()) {
2691 if (Subtarget->useMovt())
2692 ++NumMovwMovt;
2693 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2694 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2695 } else {
2696 // Create a constant pool entry for the callee address
2697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2699 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2700
2701 // Get the address of the callee into a register
2702 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2703 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2704 Callee = DAG.getLoad(
2705 PtrVt, dl, DAG.getEntryNode(), Addr,
2707 }
2708 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2709 const char *Sym = S->getSymbol();
2710
2711 if (Subtarget->genExecuteOnly()) {
2712 if (Subtarget->useMovt())
2713 ++NumMovwMovt;
2714 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2715 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2716 } else {
2717 // Create a constant pool entry for the callee address
2718 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2720 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2721
2722 // Get the address of the callee into a register
2723 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2724 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2725 Callee = DAG.getLoad(
2726 PtrVt, dl, DAG.getEntryNode(), Addr,
2728 }
2729 }
2730 } else if (isa<GlobalAddressSDNode>(Callee)) {
2731 if (!PreferIndirect) {
2732 isDirect = true;
2733 bool isDef = GVal->isStrongDefinitionForLinker();
2734
2735 // ARM call to a local ARM function is predicable.
2736 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2737 // tBX takes a register source operand.
2738 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2739 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2740 Callee = DAG.getNode(
2741 ARMISD::WrapperPIC, dl, PtrVt,
2742 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2743 Callee = DAG.getLoad(
2744 PtrVt, dl, DAG.getEntryNode(), Callee,
2748 } else if (Subtarget->isTargetCOFF()) {
2749 assert(Subtarget->isTargetWindows() &&
2750 "Windows is the only supported COFF target");
2751 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2752 if (GVal->hasDLLImportStorageClass())
2753 TargetFlags = ARMII::MO_DLLIMPORT;
2754 else if (!TM.shouldAssumeDSOLocal(GVal))
2755 TargetFlags = ARMII::MO_COFFSTUB;
2756 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2757 TargetFlags);
2758 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2759 Callee =
2760 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2761 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2763 } else {
2764 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2765 }
2766 }
2767 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2768 isDirect = true;
2769 // tBX takes a register source operand.
2770 const char *Sym = S->getSymbol();
2771 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2772 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2775 ARMPCLabelIndex, 4);
2776 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2777 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2778 Callee = DAG.getLoad(
2779 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2781 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2782 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2783 } else {
2784 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2785 }
2786 }
2787
2788 if (isCmseNSCall) {
2789 assert(!isARMFunc && !isDirect &&
2790 "Cannot handle call to ARM function or direct call");
2791 if (NumBytes > 0) {
2793 "call to non-secure function would "
2794 "require passing arguments on stack",
2795 dl.getDebugLoc());
2796 DAG.getContext()->diagnose(Diag);
2797 }
2798 if (isStructRet) {
2801 "call to non-secure function would return value through pointer",
2802 dl.getDebugLoc());
2803 DAG.getContext()->diagnose(Diag);
2804 }
2805 }
2806
2807 // FIXME: handle tail calls differently.
2808 unsigned CallOpc;
2809 if (Subtarget->isThumb()) {
2810 if (GuardWithBTI)
2811 CallOpc = ARMISD::t2CALL_BTI;
2812 else if (isCmseNSCall)
2813 CallOpc = ARMISD::tSECALL;
2814 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2815 CallOpc = ARMISD::CALL_NOLINK;
2816 else
2817 CallOpc = ARMISD::CALL;
2818 } else {
2819 if (!isDirect && !Subtarget->hasV5TOps())
2820 CallOpc = ARMISD::CALL_NOLINK;
2821 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2822 // Emit regular call when code size is the priority
2823 !Subtarget->hasMinSize())
2824 // "mov lr, pc; b _foo" to avoid confusing the RSP
2825 CallOpc = ARMISD::CALL_NOLINK;
2826 else
2827 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2828 }
2829
2830 // We don't usually want to end the call-sequence here because we would tidy
2831 // the frame up *after* the call, however in the ABI-changing tail-call case
2832 // we've carefully laid out the parameters so that when sp is reset they'll be
2833 // in the correct location.
2834 if (isTailCall && !isSibCall) {
2835 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2836 InGlue = Chain.getValue(1);
2837 }
2838
2839 std::vector<SDValue> Ops;
2840 Ops.push_back(Chain);
2841 Ops.push_back(Callee);
2842
2843 if (isTailCall) {
2844 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2845 }
2846
2847 // Add argument registers to the end of the list so that they are known live
2848 // into the call.
2849 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2850 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2851 RegsToPass[i].second.getValueType()));
2852
2853 // Add a register mask operand representing the call-preserved registers.
2854 const uint32_t *Mask;
2855 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2856 if (isThisReturn) {
2857 // For 'this' returns, use the R0-preserving mask if applicable
2858 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2859 if (!Mask) {
2860 // Set isThisReturn to false if the calling convention is not one that
2861 // allows 'returned' to be modeled in this way, so LowerCallResult does
2862 // not try to pass 'this' straight through
2863 isThisReturn = false;
2864 Mask = ARI->getCallPreservedMask(MF, CallConv);
2865 }
2866 } else
2867 Mask = ARI->getCallPreservedMask(MF, CallConv);
2868
2869 assert(Mask && "Missing call preserved mask for calling convention");
2870 Ops.push_back(DAG.getRegisterMask(Mask));
2871
2872 if (InGlue.getNode())
2873 Ops.push_back(InGlue);
2874
2875 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2876 if (isTailCall) {
2878 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2879 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2880 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2881 return Ret;
2882 }
2883
2884 // Returns a chain and a flag for retval copy to use.
2885 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2886 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2887 InGlue = Chain.getValue(1);
2888 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2889
2890 // If we're guaranteeing tail-calls will be honoured, the callee must
2891 // pop its own argument stack on return. But this call is *not* a tail call so
2892 // we need to undo that after it returns to restore the status-quo.
2893 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2894 uint64_t CalleePopBytes =
2895 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2896
2897 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2898 if (!Ins.empty())
2899 InGlue = Chain.getValue(1);
2900
2901 // Handle result values, copying them out of physregs into vregs that we
2902 // return.
2903 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2904 InVals, isThisReturn,
2905 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
2906}
2907
2908/// HandleByVal - Every parameter *after* a byval parameter is passed
2909/// on the stack. Remember the next parameter register to allocate,
2910/// and then confiscate the rest of the parameter registers to insure
2911/// this.
2912void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2913 Align Alignment) const {
2914 // Byval (as with any stack) slots are always at least 4 byte aligned.
2915 Alignment = std::max(Alignment, Align(4));
2916
2917 unsigned Reg = State->AllocateReg(GPRArgRegs);
2918 if (!Reg)
2919 return;
2920
2921 unsigned AlignInRegs = Alignment.value() / 4;
2922 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2923 for (unsigned i = 0; i < Waste; ++i)
2924 Reg = State->AllocateReg(GPRArgRegs);
2925
2926 if (!Reg)
2927 return;
2928
2929 unsigned Excess = 4 * (ARM::R4 - Reg);
2930
2931 // Special case when NSAA != SP and parameter size greater than size of
2932 // all remained GPR regs. In that case we can't split parameter, we must
2933 // send it to stack. We also must set NCRN to R4, so waste all
2934 // remained registers.
2935 const unsigned NSAAOffset = State->getStackSize();
2936 if (NSAAOffset != 0 && Size > Excess) {
2937 while (State->AllocateReg(GPRArgRegs))
2938 ;
2939 return;
2940 }
2941
2942 // First register for byval parameter is the first register that wasn't
2943 // allocated before this method call, so it would be "reg".
2944 // If parameter is small enough to be saved in range [reg, r4), then
2945 // the end (first after last) register would be reg + param-size-in-regs,
2946 // else parameter would be splitted between registers and stack,
2947 // end register would be r4 in this case.
2948 unsigned ByValRegBegin = Reg;
2949 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2950 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2951 // Note, first register is allocated in the beginning of function already,
2952 // allocate remained amount of registers we need.
2953 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2954 State->AllocateReg(GPRArgRegs);
2955 // A byval parameter that is split between registers and memory needs its
2956 // size truncated here.
2957 // In the case where the entire structure fits in registers, we set the
2958 // size in memory to zero.
2959 Size = std::max<int>(Size - Excess, 0);
2960}
2961
2962/// MatchingStackOffset - Return true if the given stack call argument is
2963/// already available in the same position (relatively) of the caller's
2964/// incoming argument stack.
2965static
2968 const TargetInstrInfo *TII) {
2969 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2970 int FI = std::numeric_limits<int>::max();
2971 if (Arg.getOpcode() == ISD::CopyFromReg) {
2972 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2973 if (!VR.isVirtual())
2974 return false;
2975 MachineInstr *Def = MRI->getVRegDef(VR);
2976 if (!Def)
2977 return false;
2978 if (!Flags.isByVal()) {
2979 if (!TII->isLoadFromStackSlot(*Def, FI))
2980 return false;
2981 } else {
2982 return false;
2983 }
2984 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2985 if (Flags.isByVal())
2986 // ByVal argument is passed in as a pointer but it's now being
2987 // dereferenced. e.g.
2988 // define @foo(%struct.X* %A) {
2989 // tail call @bar(%struct.X* byval %A)
2990 // }
2991 return false;
2992 SDValue Ptr = Ld->getBasePtr();
2993 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2994 if (!FINode)
2995 return false;
2996 FI = FINode->getIndex();
2997 } else
2998 return false;
2999
3000 assert(FI != std::numeric_limits<int>::max());
3001 if (!MFI.isFixedObjectIndex(FI))
3002 return false;
3003 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3004}
3005
3006/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3007/// for tail call optimization. Targets which want to do tail call
3008/// optimization should implement this function. Note that this function also
3009/// processes musttail calls, so when this function returns false on a valid
3010/// musttail call, a fatal backend error occurs.
3011bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3013 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3014 CallingConv::ID CalleeCC = CLI.CallConv;
3015 SDValue Callee = CLI.Callee;
3016 bool isVarArg = CLI.IsVarArg;
3017 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3018 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3020 const SelectionDAG &DAG = CLI.DAG;
3022 const Function &CallerF = MF.getFunction();
3023 CallingConv::ID CallerCC = CallerF.getCallingConv();
3024
3025 assert(Subtarget->supportsTailCall());
3026
3027 // Indirect tail calls cannot be optimized for Thumb1 if the args
3028 // to the call take up r0-r3. The reason is that there are no legal registers
3029 // left to hold the pointer to the function to be called.
3030 // Similarly, if the function uses return address sign and authentication,
3031 // r12 is needed to hold the PAC and is not available to hold the callee
3032 // address.
3033 if (Outs.size() >= 4 &&
3034 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3035 if (Subtarget->isThumb1Only())
3036 return false;
3037 // Conservatively assume the function spills LR.
3039 return false;
3040 }
3041
3042 // Look for obvious safe cases to perform tail call optimization that do not
3043 // require ABI changes. This is what gcc calls sibcall.
3044
3045 // Exception-handling functions need a special set of instructions to indicate
3046 // a return to the hardware. Tail-calling another function would probably
3047 // break this.
3048 if (CallerF.hasFnAttribute("interrupt"))
3049 return false;
3050
3051 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3052 return CalleeCC == CallerCC;
3053
3054 // Also avoid sibcall optimization if either caller or callee uses struct
3055 // return semantics.
3056 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3057 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3058 if (isCalleeStructRet || isCallerStructRet)
3059 return false;
3060
3061 // Externally-defined functions with weak linkage should not be
3062 // tail-called on ARM when the OS does not support dynamic
3063 // pre-emption of symbols, as the AAELF spec requires normal calls
3064 // to undefined weak functions to be replaced with a NOP or jump to the
3065 // next instruction. The behaviour of branch instructions in this
3066 // situation (as used for tail calls) is implementation-defined, so we
3067 // cannot rely on the linker replacing the tail call with a return.
3068 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3069 const GlobalValue *GV = G->getGlobal();
3071 if (GV->hasExternalWeakLinkage() &&
3072 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3073 return false;
3074 }
3075
3076 // Check that the call results are passed in the same way.
3077 LLVMContext &C = *DAG.getContext();
3079 getEffectiveCallingConv(CalleeCC, isVarArg),
3080 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3081 CCAssignFnForReturn(CalleeCC, isVarArg),
3082 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3083 return false;
3084 // The callee has to preserve all registers the caller needs to preserve.
3085 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3086 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3087 if (CalleeCC != CallerCC) {
3088 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3089 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3090 return false;
3091 }
3092
3093 // If Caller's vararg or byval argument has been split between registers and
3094 // stack, do not perform tail call, since part of the argument is in caller's
3095 // local frame.
3096 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3097 if (AFI_Caller->getArgRegsSaveSize())
3098 return false;
3099
3100 // If the callee takes no arguments then go on to check the results of the
3101 // call.
3102 if (!Outs.empty()) {
3103 if (CCInfo.getStackSize()) {
3104 // Check if the arguments are already laid out in the right way as
3105 // the caller's fixed stack objects.
3106 MachineFrameInfo &MFI = MF.getFrameInfo();
3107 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3108 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3109 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3110 i != e;
3111 ++i, ++realArgIdx) {
3112 CCValAssign &VA = ArgLocs[i];
3113 EVT RegVT = VA.getLocVT();
3114 SDValue Arg = OutVals[realArgIdx];
3115 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3117 return false;
3118 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3119 // f64 and vector types are split into multiple registers or
3120 // register/stack-slot combinations. The types will not match
3121 // the registers; give up on memory f64 refs until we figure
3122 // out what to do about this.
3123 if (!VA.isRegLoc())
3124 return false;
3125 if (!ArgLocs[++i].isRegLoc())
3126 return false;
3127 if (RegVT == MVT::v2f64) {
3128 if (!ArgLocs[++i].isRegLoc())
3129 return false;
3130 if (!ArgLocs[++i].isRegLoc())
3131 return false;
3132 }
3133 } else if (!VA.isRegLoc()) {
3135 MFI, MRI, TII))
3136 return false;
3137 }
3138 }
3139 }
3140
3141 const MachineRegisterInfo &MRI = MF.getRegInfo();
3142 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3143 return false;
3144 }
3145
3146 return true;
3147}
3148
3149bool
3150ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3151 MachineFunction &MF, bool isVarArg,
3153 LLVMContext &Context) const {
3155 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3156 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3157}
3158
3160 const SDLoc &DL, SelectionDAG &DAG) {
3161 const MachineFunction &MF = DAG.getMachineFunction();
3162 const Function &F = MF.getFunction();
3163
3164 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3165
3166 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3167 // version of the "preferred return address". These offsets affect the return
3168 // instruction if this is a return from PL1 without hypervisor extensions.
3169 // IRQ/FIQ: +4 "subs pc, lr, #4"
3170 // SWI: 0 "subs pc, lr, #0"
3171 // ABORT: +4 "subs pc, lr, #4"
3172 // UNDEF: +4/+2 "subs pc, lr, #0"
3173 // UNDEF varies depending on where the exception came from ARM or Thumb
3174 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3175
3176 int64_t LROffset;
3177 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3178 IntKind == "ABORT")
3179 LROffset = 4;
3180 else if (IntKind == "SWI" || IntKind == "UNDEF")
3181 LROffset = 0;
3182 else
3183 report_fatal_error("Unsupported interrupt attribute. If present, value "
3184 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3185
3186 RetOps.insert(RetOps.begin() + 1,
3187 DAG.getConstant(LROffset, DL, MVT::i32, false));
3188
3189 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3190}
3191
3192SDValue
3193ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3194 bool isVarArg,
3196 const SmallVectorImpl<SDValue> &OutVals,
3197 const SDLoc &dl, SelectionDAG &DAG) const {
3198 // CCValAssign - represent the assignment of the return value to a location.
3200
3201 // CCState - Info about the registers and stack slots.
3202 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3203 *DAG.getContext());
3204
3205 // Analyze outgoing return values.
3206 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3207
3208 SDValue Glue;
3210 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3211 bool isLittleEndian = Subtarget->isLittle();
3212
3215 AFI->setReturnRegsCount(RVLocs.size());
3216
3217 // Report error if cmse entry function returns structure through first ptr arg.
3218 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3219 // Note: using an empty SDLoc(), as the first line of the function is a
3220 // better place to report than the last line.
3223 "secure entry function would return value through pointer",
3224 SDLoc().getDebugLoc());
3225 DAG.getContext()->diagnose(Diag);
3226 }
3227
3228 // Copy the result values into the output registers.
3229 for (unsigned i = 0, realRVLocIdx = 0;
3230 i != RVLocs.size();
3231 ++i, ++realRVLocIdx) {
3232 CCValAssign &VA = RVLocs[i];
3233 assert(VA.isRegLoc() && "Can only return in registers!");
3234
3235 SDValue Arg = OutVals[realRVLocIdx];
3236 bool ReturnF16 = false;
3237
3238 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3239 // Half-precision return values can be returned like this:
3240 //
3241 // t11 f16 = fadd ...
3242 // t12: i16 = bitcast t11
3243 // t13: i32 = zero_extend t12
3244 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3245 //
3246 // to avoid code generation for bitcasts, we simply set Arg to the node
3247 // that produces the f16 value, t11 in this case.
3248 //
3249 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3250 SDValue ZE = Arg.getOperand(0);
3251 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3252 SDValue BC = ZE.getOperand(0);
3253 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3254 Arg = BC.getOperand(0);
3255 ReturnF16 = true;
3256 }
3257 }
3258 }
3259 }
3260
3261 switch (VA.getLocInfo()) {
3262 default: llvm_unreachable("Unknown loc info!");
3263 case CCValAssign::Full: break;
3264 case CCValAssign::BCvt:
3265 if (!ReturnF16)
3266 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3267 break;
3268 }
3269
3270 // Mask f16 arguments if this is a CMSE nonsecure entry.
3271 auto RetVT = Outs[realRVLocIdx].ArgVT;
3272 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3273 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3274 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3275 } else {
3276 auto LocBits = VA.getLocVT().getSizeInBits();
3277 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3278 SDValue Mask =
3279 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3280 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3281 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3282 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3283 }
3284 }
3285
3286 if (VA.needsCustom() &&
3287 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3288 if (VA.getLocVT() == MVT::v2f64) {
3289 // Extract the first half and return it in two registers.
3290 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3291 DAG.getConstant(0, dl, MVT::i32));
3292 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3293 DAG.getVTList(MVT::i32, MVT::i32), Half);
3294
3295 Chain =
3296 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3297 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3298 Glue = Chain.getValue(1);
3299 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3300 VA = RVLocs[++i]; // skip ahead to next loc
3301 Chain =
3302 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3303 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3304 Glue = Chain.getValue(1);
3305 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3306 VA = RVLocs[++i]; // skip ahead to next loc
3307
3308 // Extract the 2nd half and fall through to handle it as an f64 value.
3309 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3310 DAG.getConstant(1, dl, MVT::i32));
3311 }
3312 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3313 // available.
3314 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3315 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3316 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3317 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3318 Glue = Chain.getValue(1);
3319 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3320 VA = RVLocs[++i]; // skip ahead to next loc
3321 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3322 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3323 } else
3324 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3325
3326 // Guarantee that all emitted copies are
3327 // stuck together, avoiding something bad.
3328 Glue = Chain.getValue(1);
3329 RetOps.push_back(DAG.getRegister(
3330 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3331 }
3332 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3333 const MCPhysReg *I =
3334 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3335 if (I) {
3336 for (; *I; ++I) {
3337 if (ARM::GPRRegClass.contains(*I))
3338 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3339 else if (ARM::DPRRegClass.contains(*I))
3341 else
3342 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3343 }
3344 }
3345
3346 // Update chain and glue.
3347 RetOps[0] = Chain;
3348 if (Glue.getNode())
3349 RetOps.push_back(Glue);
3350
3351 // CPUs which aren't M-class use a special sequence to return from
3352 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3353 // though we use "subs pc, lr, #N").
3354 //
3355 // M-class CPUs actually use a normal return sequence with a special
3356 // (hardware-provided) value in LR, so the normal code path works.
3357 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3358 !Subtarget->isMClass()) {
3359 if (Subtarget->isThumb1Only())
3360 report_fatal_error("interrupt attribute is not supported in Thumb1");
3361 return LowerInterruptReturn(RetOps, dl, DAG);
3362 }
3363
3366 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3367}
3368
3369bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3370 if (N->getNumValues() != 1)
3371 return false;
3372 if (!N->hasNUsesOfValue(1, 0))
3373 return false;
3374
3375 SDValue TCChain = Chain;
3376 SDNode *Copy = *N->use_begin();
3377 if (Copy->getOpcode() == ISD::CopyToReg) {
3378 // If the copy has a glue operand, we conservatively assume it isn't safe to
3379 // perform a tail call.
3380 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3381 return false;
3382 TCChain = Copy->getOperand(0);
3383 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3384 SDNode *VMov = Copy;
3385 // f64 returned in a pair of GPRs.
3387 for (SDNode *U : VMov->uses()) {
3388 if (U->getOpcode() != ISD::CopyToReg)
3389 return false;
3390 Copies.insert(U);
3391 }
3392 if (Copies.size() > 2)
3393 return false;
3394
3395 for (SDNode *U : VMov->uses()) {
3396 SDValue UseChain = U->getOperand(0);
3397 if (Copies.count(UseChain.getNode()))
3398 // Second CopyToReg
3399 Copy = U;
3400 else {
3401 // We are at the top of this chain.
3402 // If the copy has a glue operand, we conservatively assume it
3403 // isn't safe to perform a tail call.
3404 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3405 return false;
3406 // First CopyToReg
3407 TCChain = UseChain;
3408 }
3409 }
3410 } else if (Copy->getOpcode() == ISD::BITCAST) {
3411 // f32 returned in a single GPR.
3412 if (!Copy->hasOneUse())
3413 return false;
3414 Copy = *Copy->use_begin();
3415 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3416 return false;
3417 // If the copy has a glue operand, we conservatively assume it isn't safe to
3418 // perform a tail call.
3419 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3420 return false;
3421 TCChain = Copy->getOperand(0);
3422 } else {
3423 return false;
3424 }
3425
3426 bool HasRet = false;
3427 for (const SDNode *U : Copy->uses()) {
3428 if (U->getOpcode() != ARMISD::RET_GLUE &&
3429 U->getOpcode() != ARMISD::INTRET_GLUE)
3430 return false;
3431 HasRet = true;
3432 }
3433
3434 if (!HasRet)
3435 return false;
3436
3437 Chain = TCChain;
3438 return true;
3439}
3440
3441bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3442 if (!Subtarget->supportsTailCall())
3443 return false;
3444
3445 if (!CI->isTailCall())
3446 return false;
3447
3448 return true;
3449}
3450
3451// Trying to write a 64 bit value so need to split into two 32 bit values first,
3452// and pass the lower and high parts through.
3454 SDLoc DL(Op);
3455 SDValue WriteValue = Op->getOperand(2);
3456
3457 // This function is only supposed to be called for i64 type argument.
3458 assert(WriteValue.getValueType() == MVT::i64
3459 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3460
3461 SDValue Lo, Hi;
3462 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3463 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3464 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3465}
3466
3467// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3468// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3469// one of the above mentioned nodes. It has to be wrapped because otherwise
3470// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3471// be used to form addressing mode. These wrapped nodes will be selected
3472// into MOVi.
3473SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3474 SelectionDAG &DAG) const {
3475 EVT PtrVT = Op.getValueType();
3476 // FIXME there is no actual debug info here
3477 SDLoc dl(Op);
3478 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3479 SDValue Res;
3480
3481 // When generating execute-only code Constant Pools must be promoted to the
3482 // global data section. It's a bit ugly that we can't share them across basic
3483 // blocks, but this way we guarantee that execute-only behaves correct with
3484 // position-independent addressing modes.
3485 if (Subtarget->genExecuteOnly()) {
3486 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3487 auto T = const_cast<Type*>(CP->getType());
3488 auto C = const_cast<Constant*>(CP->getConstVal());
3489 auto M = const_cast<Module*>(DAG.getMachineFunction().
3491 auto GV = new GlobalVariable(
3492 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3495 Twine(AFI->createPICLabelUId())
3496 );
3497 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3498 dl, PtrVT);
3499 return LowerGlobalAddress(GA, DAG);
3500 }
3501
3502 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3503 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3504 Align CPAlign = CP->getAlign();
3505 if (Subtarget->isThumb1Only())
3506 CPAlign = std::max(CPAlign, Align(4));
3507 if (CP->isMachineConstantPoolEntry())
3508 Res =
3509 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3510 else
3511 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3512 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3513}
3514
3516 // If we don't have a 32-bit pc-relative branch instruction then the jump
3517 // table consists of block addresses. Usually this is inline, but for
3518 // execute-only it must be placed out-of-line.
3519 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3522}
3523
3524SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3525 SelectionDAG &DAG) const {
3528 unsigned ARMPCLabelIndex = 0;
3529 SDLoc DL(Op);
3530 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3531 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3532 SDValue CPAddr;
3533 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3534 if (!IsPositionIndependent) {
3535 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3536 } else {
3537 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3538 ARMPCLabelIndex = AFI->createPICLabelUId();
3540 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3541 ARMCP::CPBlockAddress, PCAdj);
3542 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3543 }
3544 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3545 SDValue Result = DAG.getLoad(
3546 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3548 if (!IsPositionIndependent)
3549 return Result;
3550 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3551 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3552}
3553
3554/// Convert a TLS address reference into the correct sequence of loads
3555/// and calls to compute the variable's address for Darwin, and return an
3556/// SDValue containing the final node.
3557
3558/// Darwin only has one TLS scheme which must be capable of dealing with the
3559/// fully general situation, in the worst case. This means:
3560/// + "extern __thread" declaration.
3561/// + Defined in a possibly unknown dynamic library.
3562///
3563/// The general system is that each __thread variable has a [3 x i32] descriptor
3564/// which contains information used by the runtime to calculate the address. The
3565/// only part of this the compiler needs to know about is the first word, which
3566/// contains a function pointer that must be called with the address of the
3567/// entire descriptor in "r0".
3568///
3569/// Since this descriptor may be in a different unit, in general access must
3570/// proceed along the usual ARM rules. A common sequence to produce is:
3571///
3572/// movw rT1, :lower16:_var$non_lazy_ptr
3573/// movt rT1, :upper16:_var$non_lazy_ptr
3574/// ldr r0, [rT1]
3575/// ldr rT2, [r0]
3576/// blx rT2
3577/// [...address now in r0...]
3578SDValue
3579ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3580 SelectionDAG &DAG) const {
3581 assert(Subtarget->isTargetDarwin() &&
3582 "This function expects a Darwin target");
3583 SDLoc DL(Op);
3584
3585 // First step is to get the address of the actua global symbol. This is where
3586 // the TLS descriptor lives.
3587 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3588
3589 // The first entry in the descriptor is a function pointer that we must call
3590 // to obtain the address of the variable.
3591 SDValue Chain = DAG.getEntryNode();
3592 SDValue FuncTLVGet = DAG.getLoad(
3593 MVT::i32, DL, Chain, DescAddr,
3597 Chain = FuncTLVGet.getValue(1);
3598
3600 MachineFrameInfo &MFI = F.getFrameInfo();
3601 MFI.setAdjustsStack(true);
3602
3603 // TLS calls preserve all registers except those that absolutely must be
3604 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3605 // silly).
3606 auto TRI =
3608 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3610
3611 // Finally, we can make the call. This is just a degenerate version of a
3612 // normal AArch64 call node: r0 takes the address of the descriptor, and
3613 // returns the address of the variable in this thread.
3614 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3615 Chain =
3616 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3617 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3618 DAG.getRegisterMask(Mask), Chain.getValue(1));
3619 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3620}
3621
3622SDValue
3623ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3624 SelectionDAG &DAG) const {
3625 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3626
3627 SDValue Chain = DAG.getEntryNode();
3628 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3629 SDLoc DL(Op);
3630
3631 // Load the current TEB (thread environment block)
3632 SDValue Ops[] = {Chain,
3633 DAG.getTargetConstant(Intrinsic::arm_mrc,