LLVM 20.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
160 SelectionDAG &DAG, const SDLoc &DL) {
162 assert(Arg.ArgVT.bitsLT(MVT::i32));
163 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
164 SDValue Ext =
166 MVT::i32, Trunc);
167 return Ext;
168}
169
170void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
171 if (VT != PromotedLdStVT) {
173 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
174
176 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
177 }
178
179 MVT ElemTy = VT.getVectorElementType();
180 if (ElemTy != MVT::f64)
184 if (ElemTy == MVT::i32) {
189 } else {
194 }
203 if (VT.isInteger()) {
207 }
208
209 // Neon does not support vector divide/remainder operations.
218
219 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
220 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
222 setOperationAction(Opcode, VT, Legal);
223 if (!VT.isFloatingPoint())
224 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
225 setOperationAction(Opcode, VT, Legal);
226}
227
228void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
229 addRegisterClass(VT, &ARM::DPRRegClass);
230 addTypeForNEON(VT, MVT::f64);
231}
232
233void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
234 addRegisterClass(VT, &ARM::DPairRegClass);
235 addTypeForNEON(VT, MVT::v2f64);
236}
237
238void ARMTargetLowering::setAllExpand(MVT VT) {
239 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
240 setOperationAction(Opc, VT, Expand);
241
242 // We support these really simple operations even on types where all
243 // the actual arithmetic has to be broken down into simpler
244 // operations or turned into library calls.
249}
250
251void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
252 LegalizeAction Action) {
253 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
254 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
256}
257
258void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
259 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
260
261 for (auto VT : IntTypes) {
262 addRegisterClass(VT, &ARM::MQPRRegClass);
292
293 // No native support for these.
303
304 // Vector reductions
314
315 if (!HasMVEFP) {
320 } else {
323 }
324
325 // Pre and Post inc are supported on loads and stores
326 for (unsigned im = (unsigned)ISD::PRE_INC;
332 }
333 }
334
335 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
336 for (auto VT : FloatTypes) {
337 addRegisterClass(VT, &ARM::MQPRRegClass);
338 if (!HasMVEFP)
339 setAllExpand(VT);
340
341 // These are legal or custom whether we have MVE.fp or not
354
355 // Pre and Post inc are supported on loads and stores
356 for (unsigned im = (unsigned)ISD::PRE_INC;
362 }
363
364 if (HasMVEFP) {
372
373 // No native support for these.
388 }
389 }
390
391 // Custom Expand smaller than legal vector reductions to prevent false zero
392 // items being added.
401
402 // We 'support' these types up to bitcast/load/store level, regardless of
403 // MVE integer-only / float support. Only doing FP data processing on the FP
404 // vector types is inhibited at integer-only level.
405 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
406 for (auto VT : LongTypes) {
407 addRegisterClass(VT, &ARM::MQPRRegClass);
408 setAllExpand(VT);
414 }
416
417 // We can do bitwise operations on v2i64 vectors
418 setOperationAction(ISD::AND, MVT::v2i64, Legal);
419 setOperationAction(ISD::OR, MVT::v2i64, Legal);
420 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
421
422 // It is legal to extload from v4i8 to v4i16 or v4i32.
423 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
424 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
426
427 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
433
434 // Some truncating stores are legal too.
435 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
436 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
437 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
438
439 // Pre and Post inc on these are legal, given the correct extends
440 for (unsigned im = (unsigned)ISD::PRE_INC;
442 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
447 }
448 }
449
450 // Predicate types
451 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
452 for (auto VT : pTypes) {
453 addRegisterClass(VT, &ARM::VCCRRegClass);
468
469 if (!HasMVEFP) {
474 }
475 }
479 setOperationAction(ISD::OR, MVT::v2i1, Expand);
485
494}
495
497 const ARMSubtarget &STI)
498 : TargetLowering(TM), Subtarget(&STI) {
499 RegInfo = Subtarget->getRegisterInfo();
500 Itins = Subtarget->getInstrItineraryData();
501
504
505 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
506 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
507 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
508 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
509 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
510 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
512 }
513
514 if (Subtarget->isTargetMachO()) {
515 // Uses VFP for Thumb libfuncs if available.
516 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
517 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
518 static const struct {
519 const RTLIB::Libcall Op;
520 const char * const Name;
521 const ISD::CondCode Cond;
522 } LibraryCalls[] = {
523 // Single-precision floating-point arithmetic.
524 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
525 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
528
529 // Double-precision floating-point arithmetic.
530 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
531 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
534
535 // Single-precision comparisons.
536 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
537 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
538 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
539 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
540 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
541 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
542 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
543
544 // Double-precision comparisons.
545 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
546 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
547 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
548 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
549 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
550 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
551 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
552
553 // Floating-point to integer conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
557 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
560
561 // Conversions between floating types.
562 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
563 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
564
565 // Integer to floating-point conversions.
566 // i64 conversions are done via library routines even when generating VFP
567 // instructions, so use the same ones.
568 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
569 // e.g., __floatunsidf vs. __floatunssidfvfp.
570 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
571 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
573 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
574 };
575
576 for (const auto &LC : LibraryCalls) {
577 setLibcallName(LC.Op, LC.Name);
578 if (LC.Cond != ISD::SETCC_INVALID)
579 setCmpLibcallCC(LC.Op, LC.Cond);
580 }
581 }
582 }
583
584 // RTLIB
585 if (Subtarget->isAAPCS_ABI() &&
586 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
587 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
588 static const struct {
589 const RTLIB::Libcall Op;
590 const char * const Name;
591 const CallingConv::ID CC;
592 const ISD::CondCode Cond;
593 } LibraryCalls[] = {
594 // Double-precision floating-point arithmetic helper functions
595 // RTABI chapter 4.1.2, Table 2
596 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600
601 // Double-precision floating-point comparison helper functions
602 // RTABI chapter 4.1.2, Table 3
603 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
605 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
606 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
610
611 // Single-precision floating-point arithmetic helper functions
612 // RTABI chapter 4.1.2, Table 4
613 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617
618 // Single-precision floating-point comparison helper functions
619 // RTABI chapter 4.1.2, Table 5
620 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
622 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
623 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
627
628 // Floating-point to integer conversions.
629 // RTABI chapter 4.1.2, Table 6
630 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638
639 // Conversions between floating types.
640 // RTABI chapter 4.1.2, Table 7
641 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644
645 // Integer to floating-point conversions.
646 // RTABI chapter 4.1.2, Table 8
647 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655
656 // Long long helper functions
657 // RTABI chapter 4.2, Table 9
658 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662
663 // Integer division functions
664 // RTABI chapter 4.3.1
665 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 };
674
675 for (const auto &LC : LibraryCalls) {
676 setLibcallName(LC.Op, LC.Name);
677 setLibcallCallingConv(LC.Op, LC.CC);
678 if (LC.Cond != ISD::SETCC_INVALID)
679 setCmpLibcallCC(LC.Op, LC.Cond);
680 }
681
682 // EABI dependent RTLIB
683 if (TM.Options.EABIVersion == EABI::EABI4 ||
684 TM.Options.EABIVersion == EABI::EABI5) {
685 static const struct {
686 const RTLIB::Libcall Op;
687 const char *const Name;
688 const CallingConv::ID CC;
689 const ISD::CondCode Cond;
690 } MemOpsLibraryCalls[] = {
691 // Memory operations
692 // RTABI chapter 4.3.4
693 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
694 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 };
697
698 for (const auto &LC : MemOpsLibraryCalls) {
699 setLibcallName(LC.Op, LC.Name);
700 setLibcallCallingConv(LC.Op, LC.CC);
701 if (LC.Cond != ISD::SETCC_INVALID)
702 setCmpLibcallCC(LC.Op, LC.Cond);
703 }
704 }
705 }
706
707 if (Subtarget->isTargetWindows()) {
708 static const struct {
709 const RTLIB::Libcall Op;
710 const char * const Name;
711 const CallingConv::ID CC;
712 } LibraryCalls[] = {
713 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
721 };
722
723 for (const auto &LC : LibraryCalls) {
724 setLibcallName(LC.Op, LC.Name);
725 setLibcallCallingConv(LC.Op, LC.CC);
726 }
727 }
728
729 // Use divmod compiler-rt calls for iOS 5.0 and later.
730 if (Subtarget->isTargetMachO() &&
731 !(Subtarget->isTargetIOS() &&
732 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
733 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
734 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
735 }
736
737 // The half <-> float conversion functions are always soft-float on
738 // non-watchos platforms, but are needed for some targets which use a
739 // hard-float calling convention by default.
740 if (!Subtarget->isTargetWatchABI()) {
741 if (Subtarget->isAAPCS_ABI()) {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
745 } else {
746 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
747 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
749 }
750 }
751
752 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
753 // a __gnu_ prefix (which is the default).
754 if (Subtarget->isTargetAEABI()) {
755 static const struct {
756 const RTLIB::Libcall Op;
757 const char * const Name;
758 const CallingConv::ID CC;
759 } LibraryCalls[] = {
760 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
761 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
763 };
764
765 for (const auto &LC : LibraryCalls) {
766 setLibcallName(LC.Op, LC.Name);
767 setLibcallCallingConv(LC.Op, LC.CC);
768 }
769 }
770
771 if (Subtarget->isThumb1Only())
772 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
773 else
774 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
775
776 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
777 Subtarget->hasFPRegs()) {
778 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
779 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
780
785
786 if (!Subtarget->hasVFP2Base())
787 setAllExpand(MVT::f32);
788 if (!Subtarget->hasFP64())
789 setAllExpand(MVT::f64);
790 }
791
792 if (Subtarget->hasFullFP16()) {
793 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
796
799 }
800
801 if (Subtarget->hasBF16()) {
802 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
803 setAllExpand(MVT::bf16);
804 if (!Subtarget->hasFullFP16())
806 }
807
809 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
810 setTruncStoreAction(VT, InnerVT, Expand);
811 addAllExtLoads(VT, InnerVT, Expand);
812 }
813
816
818 }
819
822
825
826 if (Subtarget->hasMVEIntegerOps())
827 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
828
829 // Combine low-overhead loop intrinsics so that we can lower i1 types.
830 if (Subtarget->hasLOB()) {
832 }
833
834 if (Subtarget->hasNEON()) {
835 addDRTypeForNEON(MVT::v2f32);
836 addDRTypeForNEON(MVT::v8i8);
837 addDRTypeForNEON(MVT::v4i16);
838 addDRTypeForNEON(MVT::v2i32);
839 addDRTypeForNEON(MVT::v1i64);
840
841 addQRTypeForNEON(MVT::v4f32);
842 addQRTypeForNEON(MVT::v2f64);
843 addQRTypeForNEON(MVT::v16i8);
844 addQRTypeForNEON(MVT::v8i16);
845 addQRTypeForNEON(MVT::v4i32);
846 addQRTypeForNEON(MVT::v2i64);
847
848 if (Subtarget->hasFullFP16()) {
849 addQRTypeForNEON(MVT::v8f16);
850 addDRTypeForNEON(MVT::v4f16);
851 }
852
853 if (Subtarget->hasBF16()) {
854 addQRTypeForNEON(MVT::v8bf16);
855 addDRTypeForNEON(MVT::v4bf16);
856 }
857 }
858
859 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
860 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
861 // none of Neon, MVE or VFP supports any arithmetic operations on it.
862 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
863 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
864 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
865 // FIXME: Code duplication: FDIV and FREM are expanded always, see
866 // ARMTargetLowering::addTypeForNEON method for details.
867 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
868 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
869 // FIXME: Create unittest.
870 // In another words, find a way when "copysign" appears in DAG with vector
871 // operands.
873 // FIXME: Code duplication: SETCC has custom operation action, see
874 // ARMTargetLowering::addTypeForNEON method for details.
876 // FIXME: Create unittest for FNEG and for FABS.
877 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
878 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
880 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
881 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
882 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
883 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
884 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
887 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
890 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
896 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
897 }
898
899 if (Subtarget->hasNEON()) {
900 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
901 // supported for v4f32.
903 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
904 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
905 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
906 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
907 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
910 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
918
919 // Mark v2f32 intrinsics.
921 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
922 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
923 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
924 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
925 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
928 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
936
937 // Neon does not support some operations on v1i64 and v2i64 types.
938 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
939 // Custom handling for some quad-vector types to detect VMULL.
940 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
941 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
942 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
943 // Custom handling for some vector types to avoid expensive expansions
944 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
946 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
948 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
949 // a destination type that is wider than the source, and nor does
950 // it have a FP_TO_[SU]INT instruction with a narrower destination than
951 // source.
960
963
964 // NEON does not have single instruction CTPOP for vectors with element
965 // types wider than 8-bits. However, custom lowering can leverage the
966 // v8i8/v16i8 vcnt instruction.
973
974 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
975 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
976
977 // NEON does not have single instruction CTTZ for vectors.
979 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
980 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
981 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
982
983 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
986 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
987
992
997
1001 }
1002
1003 // NEON only has FMA instructions as of VFP4.
1004 if (!Subtarget->hasVFP4Base()) {
1005 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1006 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1007 }
1008
1011
1012 // It is legal to extload from v4i8 to v4i16 or v4i32.
1013 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1014 MVT::v2i32}) {
1019 }
1020 }
1021
1022 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1023 MVT::v4i32}) {
1028 }
1029 }
1030
1031 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1038 }
1039 if (Subtarget->hasMVEIntegerOps()) {
1042 ISD::SETCC});
1043 }
1044 if (Subtarget->hasMVEFloatOps()) {
1046 }
1047
1048 if (!Subtarget->hasFP64()) {
1049 // When targeting a floating-point unit with only single-precision
1050 // operations, f64 is legal for the few double-precision instructions which
1051 // are present However, no double-precision operations other than moves,
1052 // loads and stores are provided by the hardware.
1090 }
1091
1092 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1095 if (Subtarget->hasFullFP16()) {
1098 }
1099 }
1100
1101 if (!Subtarget->hasFP16()) {
1104 }
1105
1107
1108 // ARM does not have floating-point extending loads.
1109 for (MVT VT : MVT::fp_valuetypes()) {
1110 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1111 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1112 }
1113
1114 // ... or truncating stores
1115 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1116 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1117 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1118
1119 // ARM does not have i1 sign extending load.
1120 for (MVT VT : MVT::integer_valuetypes())
1121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1122
1123 // ARM supports all 4 flavors of integer indexed load / store.
1124 if (!Subtarget->isThumb1Only()) {
1125 for (unsigned im = (unsigned)ISD::PRE_INC;
1127 setIndexedLoadAction(im, MVT::i1, Legal);
1128 setIndexedLoadAction(im, MVT::i8, Legal);
1129 setIndexedLoadAction(im, MVT::i16, Legal);
1130 setIndexedLoadAction(im, MVT::i32, Legal);
1131 setIndexedStoreAction(im, MVT::i1, Legal);
1132 setIndexedStoreAction(im, MVT::i8, Legal);
1133 setIndexedStoreAction(im, MVT::i16, Legal);
1134 setIndexedStoreAction(im, MVT::i32, Legal);
1135 }
1136 } else {
1137 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1140 }
1141
1146
1149 if (Subtarget->hasDSP()) {
1158 }
1159 if (Subtarget->hasBaseDSP()) {
1162 }
1163
1164 // i64 operation support.
1167 if (Subtarget->isThumb1Only()) {
1170 }
1171 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1172 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1174
1184
1185 // MVE lowers 64 bit shifts to lsll and lsrl
1186 // assuming that ISD::SRL and SRA of i64 are already marked custom
1187 if (Subtarget->hasMVEIntegerOps())
1189
1190 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1191 if (Subtarget->isThumb1Only()) {
1195 }
1196
1197 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1199
1200 // ARM does not have ROTL.
1205 }
1208 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1211 }
1212
1213 // @llvm.readcyclecounter requires the Performance Monitors extension.
1214 // Default to the 0 expansion on unsupported platforms.
1215 // FIXME: Technically there are older ARM CPUs that have
1216 // implementation-specific ways of obtaining this information.
1217 if (Subtarget->hasPerfMon())
1219
1220 // Only ARMv6 has BSWAP.
1221 if (!Subtarget->hasV6Ops())
1223
1224 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1225 : Subtarget->hasDivideInARMMode();
1226 if (!hasDivide) {
1227 // These are expanded into libcalls if the cpu doesn't have HW divider.
1230 }
1231
1232 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1235
1238 }
1239
1242
1243 // Register based DivRem for AEABI (RTABI 4.2)
1244 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1245 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1246 Subtarget->isTargetWindows()) {
1249 HasStandaloneRem = false;
1250
1251 if (Subtarget->isTargetWindows()) {
1252 const struct {
1253 const RTLIB::Libcall Op;
1254 const char * const Name;
1255 const CallingConv::ID CC;
1256 } LibraryCalls[] = {
1257 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1259 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1260 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1261
1262 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1263 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1264 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1265 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1266 };
1267
1268 for (const auto &LC : LibraryCalls) {
1269 setLibcallName(LC.Op, LC.Name);
1270 setLibcallCallingConv(LC.Op, LC.CC);
1271 }
1272 } else {
1273 const struct {
1274 const RTLIB::Libcall Op;
1275 const char * const Name;
1276 const CallingConv::ID CC;
1277 } LibraryCalls[] = {
1278 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1280 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1281 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1282
1283 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1284 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1285 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1286 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1287 };
1288
1289 for (const auto &LC : LibraryCalls) {
1290 setLibcallName(LC.Op, LC.Name);
1291 setLibcallCallingConv(LC.Op, LC.CC);
1292 }
1293 }
1294
1299 } else {
1302 }
1303
1308
1309 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1311
1312 // Use the default implementation.
1314 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1316 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1319
1320 if (Subtarget->isTargetWindows())
1322 else
1324
1325 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1326 // the default expansion.
1327 InsertFencesForAtomic = false;
1328 if (Subtarget->hasAnyDataBarrier() &&
1329 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1330 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1331 // to ldrex/strex loops already.
1333 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1335
1336 // On v8, we have particularly efficient implementations of atomic fences
1337 // if they can be combined with nearby atomic loads and stores.
1338 if (!Subtarget->hasAcquireRelease() ||
1339 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1340 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1341 InsertFencesForAtomic = true;
1342 }
1343 } else {
1344 // If there's anything we can use as a barrier, go through custom lowering
1345 // for ATOMIC_FENCE.
1346 // If target has DMB in thumb, Fences can be inserted.
1347 if (Subtarget->hasDataBarrier())
1348 InsertFencesForAtomic = true;
1349
1351 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1352
1353 // Set them all for libcall, which will force libcalls.
1366 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1367 // Unordered/Monotonic case.
1368 if (!InsertFencesForAtomic) {
1371 }
1372 }
1373
1374 // Compute supported atomic widths.
1375 if (Subtarget->isTargetLinux() ||
1376 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1377 // For targets where __sync_* routines are reliably available, we use them
1378 // if necessary.
1379 //
1380 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1381 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1382 //
1383 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1384 // such targets should provide __sync_* routines, which use the ARM mode
1385 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1386 // encoding; see ARMISD::MEMBARRIER_MCR.)
1388 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1389 Subtarget->hasForced32BitAtomics()) {
1390 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1392 } else {
1393 // We can't assume anything about other targets; just use libatomic
1394 // routines.
1396 }
1397
1399
1401
1402 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1403 if (!Subtarget->hasV6Ops()) {
1406 }
1408
1409 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1410 !Subtarget->isThumb1Only()) {
1411 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1412 // iff target supports vfp2.
1422 }
1423
1424 // We want to custom lower some of our intrinsics.
1429 if (Subtarget->useSjLjEH())
1430 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1431
1441 if (Subtarget->hasFullFP16()) {
1445 }
1446
1448
1451 if (Subtarget->hasFullFP16())
1455 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1456
1457 // We don't support sin/cos/fmod/copysign/pow
1466 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1467 !Subtarget->isThumb1Only()) {
1470 }
1473
1474 if (!Subtarget->hasVFP4Base()) {
1477 }
1478
1479 // Various VFP goodness
1480 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1481 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1482 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1485 }
1486
1487 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1488 if (!Subtarget->hasFP16()) {
1491 }
1492
1493 // Strict floating-point comparisons need custom lowering.
1500 }
1501
1502 // Use __sincos_stret if available.
1503 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1504 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1507 }
1508
1509 // FP-ARMv8 implements a lot of rounding-like FP operations.
1510 if (Subtarget->hasFPARMv8Base()) {
1519 if (Subtarget->hasNEON()) {
1524 }
1525
1526 if (Subtarget->hasFP64()) {
1535 }
1536 }
1537
1538 // FP16 often need to be promoted to call lib functions
1539 if (Subtarget->hasFullFP16()) {
1554
1556 }
1557
1558 if (Subtarget->hasNEON()) {
1559 // vmin and vmax aren't available in a scalar form, so we can use
1560 // a NEON instruction with an undef lane instead.
1569
1570 if (Subtarget->hasFullFP16()) {
1575
1580 }
1581 }
1582
1583 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1584 // it, but it's just a wrapper around ldexp.
1585 if (Subtarget->isTargetWindows()) {
1587 if (isOperationExpand(Op, MVT::f32))
1588 setOperationAction(Op, MVT::f32, Promote);
1589 }
1590
1591 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1592 // isn't legal.
1594 if (isOperationExpand(Op, MVT::f16))
1595 setOperationAction(Op, MVT::f16, Promote);
1596
1597 // We have target-specific dag combine patterns for the following nodes:
1598 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1601
1602 if (Subtarget->hasMVEIntegerOps())
1604
1605 if (Subtarget->hasV6Ops())
1607 if (Subtarget->isThumb1Only())
1609 // Attempt to lower smin/smax to ssat/usat
1610 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1611 Subtarget->isThumb2()) {
1613 }
1614
1616
1617 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1618 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1620 else
1622
1623 //// temporary - rewrite interface to use type
1626 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1628 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1630
1631 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1632 // are at least 4 bytes aligned.
1634
1635 // Prefer likely predicted branches to selects on out-of-order cores.
1636 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1637
1638 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1640
1641 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1642}
1643
1645 return Subtarget->useSoftFloat();
1646}
1647
1648// FIXME: It might make sense to define the representative register class as the
1649// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1650// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1651// SPR's representative would be DPR_VFP2. This should work well if register
1652// pressure tracking were modified such that a register use would increment the
1653// pressure of the register class's representative and all of it's super
1654// classes' representatives transitively. We have not implemented this because
1655// of the difficulty prior to coalescing of modeling operand register classes
1656// due to the common occurrence of cross class copies and subregister insertions
1657// and extractions.
1658std::pair<const TargetRegisterClass *, uint8_t>
1660 MVT VT) const {
1661 const TargetRegisterClass *RRC = nullptr;
1662 uint8_t Cost = 1;
1663 switch (VT.SimpleTy) {
1664 default:
1666 // Use DPR as representative register class for all floating point
1667 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1668 // the cost is 1 for both f32 and f64.
1669 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1670 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1671 RRC = &ARM::DPRRegClass;
1672 // When NEON is used for SP, only half of the register file is available
1673 // because operations that define both SP and DP results will be constrained
1674 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1675 // coalescing by double-counting the SP regs. See the FIXME above.
1676 if (Subtarget->useNEONForSinglePrecisionFP())
1677 Cost = 2;
1678 break;
1679 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1680 case MVT::v4f32: case MVT::v2f64:
1681 RRC = &ARM::DPRRegClass;
1682 Cost = 2;
1683 break;
1684 case MVT::v4i64:
1685 RRC = &ARM::DPRRegClass;
1686 Cost = 4;
1687 break;
1688 case MVT::v8i64:
1689 RRC = &ARM::DPRRegClass;
1690 Cost = 8;
1691 break;
1692 }
1693 return std::make_pair(RRC, Cost);
1694}
1695
1696const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1697#define MAKE_CASE(V) \
1698 case V: \
1699 return #V;
1700 switch ((ARMISD::NodeType)Opcode) {
1702 break;
1905#undef MAKE_CASE
1906 }
1907 return nullptr;
1908}
1909
1911 EVT VT) const {
1912 if (!VT.isVector())
1913 return getPointerTy(DL);
1914
1915 // MVE has a predicate register.
1916 if ((Subtarget->hasMVEIntegerOps() &&
1917 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1918 VT == MVT::v16i8)) ||
1919 (Subtarget->hasMVEFloatOps() &&
1920 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1921 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1923}
1924
1925/// getRegClassFor - Return the register class that should be used for the
1926/// specified value type.
1927const TargetRegisterClass *
1928ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1929 (void)isDivergent;
1930 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1931 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1932 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1933 // MVE Q registers.
1934 if (Subtarget->hasNEON()) {
1935 if (VT == MVT::v4i64)
1936 return &ARM::QQPRRegClass;
1937 if (VT == MVT::v8i64)
1938 return &ARM::QQQQPRRegClass;
1939 }
1940 if (Subtarget->hasMVEIntegerOps()) {
1941 if (VT == MVT::v4i64)
1942 return &ARM::MQQPRRegClass;
1943 if (VT == MVT::v8i64)
1944 return &ARM::MQQQQPRRegClass;
1945 }
1947}
1948
1949// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1950// source/dest is aligned and the copy size is large enough. We therefore want
1951// to align such objects passed to memory intrinsics.
1953 Align &PrefAlign) const {
1954 if (!isa<MemIntrinsic>(CI))
1955 return false;
1956 MinSize = 8;
1957 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1958 // cycle faster than 4-byte aligned LDM.
1959 PrefAlign =
1960 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1961 return true;
1962}
1963
1964// Create a fast isel object.
1965FastISel *
1967 const TargetLibraryInfo *libInfo) const {
1968 return ARM::createFastISel(funcInfo, libInfo);
1969}
1970
1972 unsigned NumVals = N->getNumValues();
1973 if (!NumVals)
1974 return Sched::RegPressure;
1975
1976 for (unsigned i = 0; i != NumVals; ++i) {
1977 EVT VT = N->getValueType(i);
1978 if (VT == MVT::Glue || VT == MVT::Other)
1979 continue;
1980 if (VT.isFloatingPoint() || VT.isVector())
1981 return Sched::ILP;
1982 }
1983
1984 if (!N->isMachineOpcode())
1985 return Sched::RegPressure;
1986
1987 // Load are scheduled for latency even if there instruction itinerary
1988 // is not available.
1989 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1990 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1991
1992 if (MCID.getNumDefs() == 0)
1993 return Sched::RegPressure;
1994 if (!Itins->isEmpty() &&
1995 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1996 return Sched::ILP;
1997
1998 return Sched::RegPressure;
1999}
2000
2001//===----------------------------------------------------------------------===//
2002// Lowering Code
2003//===----------------------------------------------------------------------===//
2004
2005static bool isSRL16(const SDValue &Op) {
2006 if (Op.getOpcode() != ISD::SRL)
2007 return false;
2008 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2009 return Const->getZExtValue() == 16;
2010 return false;
2011}
2012
2013static bool isSRA16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SRA)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021static bool isSHL16(const SDValue &Op) {
2022 if (Op.getOpcode() != ISD::SHL)
2023 return false;
2024 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2025 return Const->getZExtValue() == 16;
2026 return false;
2027}
2028
2029// Check for a signed 16-bit value. We special case SRA because it makes it
2030// more simple when also looking for SRAs that aren't sign extending a
2031// smaller value. Without the check, we'd need to take extra care with
2032// checking order for some operations.
2033static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2034 if (isSRA16(Op))
2035 return isSHL16(Op.getOperand(0));
2036 return DAG.ComputeNumSignBits(Op) == 17;
2037}
2038
2039/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2041 switch (CC) {
2042 default: llvm_unreachable("Unknown condition code!");
2043 case ISD::SETNE: return ARMCC::NE;
2044 case ISD::SETEQ: return ARMCC::EQ;
2045 case ISD::SETGT: return ARMCC::GT;
2046 case ISD::SETGE: return ARMCC::GE;
2047 case ISD::SETLT: return ARMCC::LT;
2048 case ISD::SETLE: return ARMCC::LE;
2049 case ISD::SETUGT: return ARMCC::HI;
2050 case ISD::SETUGE: return ARMCC::HS;
2051 case ISD::SETULT: return ARMCC::LO;
2052 case ISD::SETULE: return ARMCC::LS;
2053 }
2054}
2055
2056/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2058 ARMCC::CondCodes &CondCode2) {
2059 CondCode2 = ARMCC::AL;
2060 switch (CC) {
2061 default: llvm_unreachable("Unknown FP condition!");
2062 case ISD::SETEQ:
2063 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2064 case ISD::SETGT:
2065 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2066 case ISD::SETGE:
2067 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2068 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2069 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2070 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2071 case ISD::SETO: CondCode = ARMCC::VC; break;
2072 case ISD::SETUO: CondCode = ARMCC::VS; break;
2073 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2074 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2075 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2076 case ISD::SETLT:
2077 case ISD::SETULT: CondCode = ARMCC::LT; break;
2078 case ISD::SETLE:
2079 case ISD::SETULE: CondCode = ARMCC::LE; break;
2080 case ISD::SETNE:
2081 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2082 }
2083}
2084
2085//===----------------------------------------------------------------------===//
2086// Calling Convention Implementation
2087//===----------------------------------------------------------------------===//
2088
2089/// getEffectiveCallingConv - Get the effective calling convention, taking into
2090/// account presence of floating point hardware and calling convention
2091/// limitations, such as support for variadic functions.
2093ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2094 bool isVarArg) const {
2095 switch (CC) {
2096 default:
2097 report_fatal_error("Unsupported calling convention");
2100 case CallingConv::GHC:
2102 return CC;
2108 case CallingConv::Swift:
2111 case CallingConv::C:
2112 case CallingConv::Tail:
2113 if (!Subtarget->isAAPCS_ABI())
2114 return CallingConv::ARM_APCS;
2115 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2116 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2117 !isVarArg)
2119 else
2121 case CallingConv::Fast:
2123 if (!Subtarget->isAAPCS_ABI()) {
2124 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2125 return CallingConv::Fast;
2126 return CallingConv::ARM_APCS;
2127 } else if (Subtarget->hasVFP2Base() &&
2128 !Subtarget->isThumb1Only() && !isVarArg)
2130 else
2132 }
2133}
2134
2136 bool isVarArg) const {
2137 return CCAssignFnForNode(CC, false, isVarArg);
2138}
2139
2141 bool isVarArg) const {
2142 return CCAssignFnForNode(CC, true, isVarArg);
2143}
2144
2145/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2146/// CallingConvention.
2147CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2148 bool Return,
2149 bool isVarArg) const {
2150 switch (getEffectiveCallingConv(CC, isVarArg)) {
2151 default:
2152 report_fatal_error("Unsupported calling convention");
2154 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2158 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2159 case CallingConv::Fast:
2160 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2161 case CallingConv::GHC:
2162 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2166 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2168 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2169 }
2170}
2171
2172SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2173 MVT LocVT, MVT ValVT, SDValue Val) const {
2174 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2175 Val);
2176 if (Subtarget->hasFullFP16()) {
2177 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2178 } else {
2179 Val = DAG.getNode(ISD::TRUNCATE, dl,
2180 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2181 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2182 }
2183 return Val;
2184}
2185
2186SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2187 MVT LocVT, MVT ValVT,
2188 SDValue Val) const {
2189 if (Subtarget->hasFullFP16()) {
2190 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2191 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2192 } else {
2193 Val = DAG.getNode(ISD::BITCAST, dl,
2194 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2195 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2196 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2197 }
2198 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2199}
2200
2201/// LowerCallResult - Lower the result values of a call into the
2202/// appropriate copies out of appropriate physical registers.
2203SDValue ARMTargetLowering::LowerCallResult(
2204 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2205 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2206 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2207 SDValue ThisVal, bool isCmseNSCall) const {
2208 // Assign locations to each value returned by this call.
2210 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2211 *DAG.getContext());
2212 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2213
2214 // Copy all of the result registers out of their specified physreg.
2215 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2216 CCValAssign VA = RVLocs[i];
2217
2218 // Pass 'this' value directly from the argument to return value, to avoid
2219 // reg unit interference
2220 if (i == 0 && isThisReturn) {
2221 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2222 "unexpected return calling convention register assignment");
2223 InVals.push_back(ThisVal);
2224 continue;
2225 }
2226
2227 SDValue Val;
2228 if (VA.needsCustom() &&
2229 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2230 // Handle f64 or half of a v2f64.
2231 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2232 InGlue);
2233 Chain = Lo.getValue(1);
2234 InGlue = Lo.getValue(2);
2235 VA = RVLocs[++i]; // skip ahead to next loc
2236 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2237 InGlue);
2238 Chain = Hi.getValue(1);
2239 InGlue = Hi.getValue(2);
2240 if (!Subtarget->isLittle())
2241 std::swap (Lo, Hi);
2242 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2243
2244 if (VA.getLocVT() == MVT::v2f64) {
2245 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2246 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2247 DAG.getConstant(0, dl, MVT::i32));
2248
2249 VA = RVLocs[++i]; // skip ahead to next loc
2250 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2251 Chain = Lo.getValue(1);
2252 InGlue = Lo.getValue(2);
2253 VA = RVLocs[++i]; // skip ahead to next loc
2254 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2255 Chain = Hi.getValue(1);
2256 InGlue = Hi.getValue(2);
2257 if (!Subtarget->isLittle())
2258 std::swap (Lo, Hi);
2259 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2260 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2261 DAG.getConstant(1, dl, MVT::i32));
2262 }
2263 } else {
2264 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2265 InGlue);
2266 Chain = Val.getValue(1);
2267 InGlue = Val.getValue(2);
2268 }
2269
2270 switch (VA.getLocInfo()) {
2271 default: llvm_unreachable("Unknown loc info!");
2272 case CCValAssign::Full: break;
2273 case CCValAssign::BCvt:
2274 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2275 break;
2276 }
2277
2278 // f16 arguments have their size extended to 4 bytes and passed as if they
2279 // had been copied to the LSBs of a 32-bit register.
2280 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2281 if (VA.needsCustom() &&
2282 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2283 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2284
2285 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2286 // is less than 32 bits must be sign- or zero-extended after the call for
2287 // security reasons. Although the ABI mandates an extension done by the
2288 // callee, the latter cannot be trusted to follow the rules of the ABI.
2289 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2290 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2291 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2292 Val = handleCMSEValue(Val, Arg, DAG, dl);
2293
2294 InVals.push_back(Val);
2295 }
2296
2297 return Chain;
2298}
2299
2300std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2301 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2302 bool IsTailCall, int SPDiff) const {
2303 SDValue DstAddr;
2304 MachinePointerInfo DstInfo;
2305 int32_t Offset = VA.getLocMemOffset();
2307
2308 if (IsTailCall) {
2309 Offset += SPDiff;
2310 auto PtrVT = getPointerTy(DAG.getDataLayout());
2311 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2312 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2313 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2314 DstInfo =
2316 } else {
2317 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2318 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2319 StackPtr, PtrOff);
2320 DstInfo =
2322 }
2323
2324 return std::make_pair(DstAddr, DstInfo);
2325}
2326
2327void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2328 SDValue Chain, SDValue &Arg,
2329 RegsToPassVector &RegsToPass,
2330 CCValAssign &VA, CCValAssign &NextVA,
2331 SDValue &StackPtr,
2332 SmallVectorImpl<SDValue> &MemOpChains,
2333 bool IsTailCall,
2334 int SPDiff) const {
2335 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2336 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2337 unsigned id = Subtarget->isLittle() ? 0 : 1;
2338 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2339
2340 if (NextVA.isRegLoc())
2341 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2342 else {
2343 assert(NextVA.isMemLoc());
2344 if (!StackPtr.getNode())
2345 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2347
2348 SDValue DstAddr;
2349 MachinePointerInfo DstInfo;
2350 std::tie(DstAddr, DstInfo) =
2351 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2352 MemOpChains.push_back(
2353 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2354 }
2355}
2356
2357static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2358 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2360}
2361
2362/// LowerCall - Lowering a call into a callseq_start <-
2363/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2364/// nodes.
2365SDValue
2366ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2367 SmallVectorImpl<SDValue> &InVals) const {
2368 SelectionDAG &DAG = CLI.DAG;
2369 SDLoc &dl = CLI.DL;
2371 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2373 SDValue Chain = CLI.Chain;
2374 SDValue Callee = CLI.Callee;
2375 bool &isTailCall = CLI.IsTailCall;
2376 CallingConv::ID CallConv = CLI.CallConv;
2377 bool doesNotRet = CLI.DoesNotReturn;
2378 bool isVarArg = CLI.IsVarArg;
2379
2383 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2384 bool isThisReturn = false;
2385 bool isCmseNSCall = false;
2386 bool isSibCall = false;
2387 bool PreferIndirect = false;
2388 bool GuardWithBTI = false;
2389
2390 // Analyze operands of the call, assigning locations to each operand.
2392 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2393 *DAG.getContext());
2394 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2395
2396 // Lower 'returns_twice' calls to a pseudo-instruction.
2397 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2398 !Subtarget->noBTIAtReturnTwice())
2399 GuardWithBTI = AFI->branchTargetEnforcement();
2400
2401 // Determine whether this is a non-secure function call.
2402 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2403 isCmseNSCall = true;
2404
2405 // Disable tail calls if they're not supported.
2406 if (!Subtarget->supportsTailCall())
2407 isTailCall = false;
2408
2409 // For both the non-secure calls and the returns from a CMSE entry function,
2410 // the function needs to do some extra work afte r the call, or before the
2411 // return, respectively, thus it cannot end with atail call
2412 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2413 isTailCall = false;
2414
2415 if (isa<GlobalAddressSDNode>(Callee)) {
2416 // If we're optimizing for minimum size and the function is called three or
2417 // more times in this block, we can improve codesize by calling indirectly
2418 // as BLXr has a 16-bit encoding.
2419 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2420 if (CLI.CB) {
2421 auto *BB = CLI.CB->getParent();
2422 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2423 count_if(GV->users(), [&BB](const User *U) {
2424 return isa<Instruction>(U) &&
2425 cast<Instruction>(U)->getParent() == BB;
2426 }) > 2;
2427 }
2428 }
2429 if (isTailCall) {
2430 // Check if it's really possible to do a tail call.
2431 isTailCall =
2432 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2433
2434 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2435 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2436 isSibCall = true;
2437
2438 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2439 // detected sibcalls.
2440 if (isTailCall)
2441 ++NumTailCalls;
2442 }
2443
2444 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2445 report_fatal_error("failed to perform tail call elimination on a call "
2446 "site marked musttail");
2447
2448 // Get a count of how many bytes are to be pushed on the stack.
2449 unsigned NumBytes = CCInfo.getStackSize();
2450
2451 // SPDiff is the byte offset of the call's argument area from the callee's.
2452 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2453 // by this amount for a tail call. In a sibling call it must be 0 because the
2454 // caller will deallocate the entire stack and the callee still expects its
2455 // arguments to begin at SP+0. Completely unused for non-tail calls.
2456 int SPDiff = 0;
2457
2458 if (isTailCall && !isSibCall) {
2459 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2460 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2461
2462 // Since callee will pop argument stack as a tail call, we must keep the
2463 // popped size 16-byte aligned.
2464 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2465 NumBytes = alignTo(NumBytes, StackAlign);
2466
2467 // SPDiff will be negative if this tail call requires more space than we
2468 // would automatically have in our incoming argument space. Positive if we
2469 // can actually shrink the stack.
2470 SPDiff = NumReusableBytes - NumBytes;
2471
2472 // If this call requires more stack than we have available from
2473 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2474 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2475 AFI->setArgRegsSaveSize(-SPDiff);
2476 }
2477
2478 if (isSibCall) {
2479 // For sibling tail calls, memory operands are available in our caller's stack.
2480 NumBytes = 0;
2481 } else {
2482 // Adjust the stack pointer for the new arguments...
2483 // These operations are automatically eliminated by the prolog/epilog pass
2484 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2485 }
2486
2488 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2489
2490 RegsToPassVector RegsToPass;
2491 SmallVector<SDValue, 8> MemOpChains;
2492
2493 // During a tail call, stores to the argument area must happen after all of
2494 // the function's incoming arguments have been loaded because they may alias.
2495 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2496 // there's no point in doing so repeatedly so this tracks whether that's
2497 // happened yet.
2498 bool AfterFormalArgLoads = false;
2499
2500 // Walk the register/memloc assignments, inserting copies/loads. In the case
2501 // of tail call optimization, arguments are handled later.
2502 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2503 i != e;
2504 ++i, ++realArgIdx) {
2505 CCValAssign &VA = ArgLocs[i];
2506 SDValue Arg = OutVals[realArgIdx];
2507 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2508 bool isByVal = Flags.isByVal();
2509
2510 // Promote the value if needed.
2511 switch (VA.getLocInfo()) {
2512 default: llvm_unreachable("Unknown loc info!");
2513 case CCValAssign::Full: break;
2514 case CCValAssign::SExt:
2515 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2516 break;
2517 case CCValAssign::ZExt:
2518 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2519 break;
2520 case CCValAssign::AExt:
2521 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2522 break;
2523 case CCValAssign::BCvt:
2524 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2525 break;
2526 }
2527
2528 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2529 Chain = DAG.getStackArgumentTokenFactor(Chain);
2530 AfterFormalArgLoads = true;
2531 }
2532
2533 // f16 arguments have their size extended to 4 bytes and passed as if they
2534 // had been copied to the LSBs of a 32-bit register.
2535 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2536 if (VA.needsCustom() &&
2537 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2538 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2539 } else {
2540 // f16 arguments could have been extended prior to argument lowering.
2541 // Mask them arguments if this is a CMSE nonsecure call.
2542 auto ArgVT = Outs[realArgIdx].ArgVT;
2543 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2544 auto LocBits = VA.getLocVT().getSizeInBits();
2545 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2546 SDValue Mask =
2547 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2548 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2549 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2550 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2551 }
2552 }
2553
2554 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2555 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2556 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2557 DAG.getConstant(0, dl, MVT::i32));
2558 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2559 DAG.getConstant(1, dl, MVT::i32));
2560
2561 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2562 StackPtr, MemOpChains, isTailCall, SPDiff);
2563
2564 VA = ArgLocs[++i]; // skip ahead to next loc
2565 if (VA.isRegLoc()) {
2566 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2567 StackPtr, MemOpChains, isTailCall, SPDiff);
2568 } else {
2569 assert(VA.isMemLoc());
2570 SDValue DstAddr;
2571 MachinePointerInfo DstInfo;
2572 std::tie(DstAddr, DstInfo) =
2573 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2574 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2575 }
2576 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2577 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2578 StackPtr, MemOpChains, isTailCall, SPDiff);
2579 } else if (VA.isRegLoc()) {
2580 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2581 Outs[0].VT == MVT::i32) {
2582 assert(VA.getLocVT() == MVT::i32 &&
2583 "unexpected calling convention register assignment");
2584 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2585 "unexpected use of 'returned'");
2586 isThisReturn = true;
2587 }
2588 const TargetOptions &Options = DAG.getTarget().Options;
2589 if (Options.EmitCallSiteInfo)
2590 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2591 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2592 } else if (isByVal) {
2593 assert(VA.isMemLoc());
2594 unsigned offset = 0;
2595
2596 // True if this byval aggregate will be split between registers
2597 // and memory.
2598 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2599 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2600
2601 if (CurByValIdx < ByValArgsCount) {
2602
2603 unsigned RegBegin, RegEnd;
2604 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2605
2606 EVT PtrVT =
2608 unsigned int i, j;
2609 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2610 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2611 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2612 SDValue Load =
2613 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2614 DAG.InferPtrAlign(AddArg));
2615 MemOpChains.push_back(Load.getValue(1));
2616 RegsToPass.push_back(std::make_pair(j, Load));
2617 }
2618
2619 // If parameter size outsides register area, "offset" value
2620 // helps us to calculate stack slot for remained part properly.
2621 offset = RegEnd - RegBegin;
2622
2623 CCInfo.nextInRegsParam();
2624 }
2625
2626 if (Flags.getByValSize() > 4*offset) {
2627 auto PtrVT = getPointerTy(DAG.getDataLayout());
2628 SDValue Dst;
2629 MachinePointerInfo DstInfo;
2630 std::tie(Dst, DstInfo) =
2631 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2632 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2633 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2634 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2635 MVT::i32);
2636 SDValue AlignNode =
2637 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2638
2639 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2640 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2641 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2642 Ops));
2643 }
2644 } else {
2645 assert(VA.isMemLoc());
2646 SDValue DstAddr;
2647 MachinePointerInfo DstInfo;
2648 std::tie(DstAddr, DstInfo) =
2649 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2650
2651 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2652 MemOpChains.push_back(Store);
2653 }
2654 }
2655
2656 if (!MemOpChains.empty())
2657 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2658
2659 // Build a sequence of copy-to-reg nodes chained together with token chain
2660 // and flag operands which copy the outgoing args into the appropriate regs.
2661 SDValue InGlue;
2662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2663 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2664 RegsToPass[i].second, InGlue);
2665 InGlue = Chain.getValue(1);
2666 }
2667
2668 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2669 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2670 // node so that legalize doesn't hack it.
2671 bool isDirect = false;
2672
2674 const GlobalValue *GVal = nullptr;
2675 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2676 GVal = G->getGlobal();
2677 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2678
2679 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2680 bool isLocalARMFunc = false;
2681 auto PtrVt = getPointerTy(DAG.getDataLayout());
2682
2683 if (Subtarget->genLongCalls()) {
2684 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2685 "long-calls codegen is not position independent!");
2686 // Handle a global address or an external symbol. If it's not one of
2687 // those, the target's already in a register, so we don't need to do
2688 // anything extra.
2689 if (isa<GlobalAddressSDNode>(Callee)) {
2690 if (Subtarget->genExecuteOnly()) {
2691 if (Subtarget->useMovt())
2692 ++NumMovwMovt;
2693 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2694 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2695 } else {
2696 // Create a constant pool entry for the callee address
2697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2699 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2700
2701 // Get the address of the callee into a register
2702 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2703 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2704 Callee = DAG.getLoad(
2705 PtrVt, dl, DAG.getEntryNode(), Addr,
2707 }
2708 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2709 const char *Sym = S->getSymbol();
2710
2711 if (Subtarget->genExecuteOnly()) {
2712 if (Subtarget->useMovt())
2713 ++NumMovwMovt;
2714 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2715 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2716 } else {
2717 // Create a constant pool entry for the callee address
2718 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2720 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2721
2722 // Get the address of the callee into a register
2723 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2724 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2725 Callee = DAG.getLoad(
2726 PtrVt, dl, DAG.getEntryNode(), Addr,
2728 }
2729 }
2730 } else if (isa<GlobalAddressSDNode>(Callee)) {
2731 if (!PreferIndirect) {
2732 isDirect = true;
2733 bool isDef = GVal->isStrongDefinitionForLinker();
2734
2735 // ARM call to a local ARM function is predicable.
2736 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2737 // tBX takes a register source operand.
2738 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2739 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2740 Callee = DAG.getNode(
2741 ARMISD::WrapperPIC, dl, PtrVt,
2742 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2743 Callee = DAG.getLoad(
2744 PtrVt, dl, DAG.getEntryNode(), Callee,
2748 } else if (Subtarget->isTargetCOFF()) {
2749 assert(Subtarget->isTargetWindows() &&
2750 "Windows is the only supported COFF target");
2751 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2752 if (GVal->hasDLLImportStorageClass())
2753 TargetFlags = ARMII::MO_DLLIMPORT;
2754 else if (!TM.shouldAssumeDSOLocal(GVal))
2755 TargetFlags = ARMII::MO_COFFSTUB;
2756 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2757 TargetFlags);
2758 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2759 Callee =
2760 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2761 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2763 } else {
2764 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2765 }
2766 }
2767 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2768 isDirect = true;
2769 // tBX takes a register source operand.
2770 const char *Sym = S->getSymbol();
2771 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2772 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2775 ARMPCLabelIndex, 4);
2776 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2777 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2778 Callee = DAG.getLoad(
2779 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2781 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2782 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2783 } else {
2784 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2785 }
2786 }
2787
2788 if (isCmseNSCall) {
2789 assert(!isARMFunc && !isDirect &&
2790 "Cannot handle call to ARM function or direct call");
2791 if (NumBytes > 0) {
2793 "call to non-secure function would "
2794 "require passing arguments on stack",
2795 dl.getDebugLoc());
2796 DAG.getContext()->diagnose(Diag);
2797 }
2798 if (isStructRet) {
2801 "call to non-secure function would return value through pointer",
2802 dl.getDebugLoc());
2803 DAG.getContext()->diagnose(Diag);
2804 }
2805 }
2806
2807 // FIXME: handle tail calls differently.
2808 unsigned CallOpc;
2809 if (Subtarget->isThumb()) {
2810 if (GuardWithBTI)
2811 CallOpc = ARMISD::t2CALL_BTI;
2812 else if (isCmseNSCall)
2813 CallOpc = ARMISD::tSECALL;
2814 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2815 CallOpc = ARMISD::CALL_NOLINK;
2816 else
2817 CallOpc = ARMISD::CALL;
2818 } else {
2819 if (!isDirect && !Subtarget->hasV5TOps())
2820 CallOpc = ARMISD::CALL_NOLINK;
2821 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2822 // Emit regular call when code size is the priority
2823 !Subtarget->hasMinSize())
2824 // "mov lr, pc; b _foo" to avoid confusing the RSP
2825 CallOpc = ARMISD::CALL_NOLINK;
2826 else
2827 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2828 }
2829
2830 // We don't usually want to end the call-sequence here because we would tidy
2831 // the frame up *after* the call, however in the ABI-changing tail-call case
2832 // we've carefully laid out the parameters so that when sp is reset they'll be
2833 // in the correct location.
2834 if (isTailCall && !isSibCall) {
2835 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2836 InGlue = Chain.getValue(1);
2837 }
2838
2839 std::vector<SDValue> Ops;
2840 Ops.push_back(Chain);
2841 Ops.push_back(Callee);
2842
2843 if (isTailCall) {
2844 Ops.push_back(
2845 DAG.getSignedConstant(SPDiff, dl, MVT::i32, /*isTarget=*/true));
2846 }
2847
2848 // Add argument registers to the end of the list so that they are known live
2849 // into the call.
2850 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2851 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2852 RegsToPass[i].second.getValueType()));
2853
2854 // Add a register mask operand representing the call-preserved registers.
2855 const uint32_t *Mask;
2856 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2857 if (isThisReturn) {
2858 // For 'this' returns, use the R0-preserving mask if applicable
2859 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2860 if (!Mask) {
2861 // Set isThisReturn to false if the calling convention is not one that
2862 // allows 'returned' to be modeled in this way, so LowerCallResult does
2863 // not try to pass 'this' straight through
2864 isThisReturn = false;
2865 Mask = ARI->getCallPreservedMask(MF, CallConv);
2866 }
2867 } else
2868 Mask = ARI->getCallPreservedMask(MF, CallConv);
2869
2870 assert(Mask && "Missing call preserved mask for calling convention");
2871 Ops.push_back(DAG.getRegisterMask(Mask));
2872
2873 if (InGlue.getNode())
2874 Ops.push_back(InGlue);
2875
2876 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2877 if (isTailCall) {
2879 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2880 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2881 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2882 return Ret;
2883 }
2884
2885 // Returns a chain and a flag for retval copy to use.
2886 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2887 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2888 InGlue = Chain.getValue(1);
2889 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2890
2891 // If we're guaranteeing tail-calls will be honoured, the callee must
2892 // pop its own argument stack on return. But this call is *not* a tail call so
2893 // we need to undo that after it returns to restore the status-quo.
2894 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2895 uint64_t CalleePopBytes =
2896 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
2897
2898 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2899 if (!Ins.empty())
2900 InGlue = Chain.getValue(1);
2901
2902 // Handle result values, copying them out of physregs into vregs that we
2903 // return.
2904 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2905 InVals, isThisReturn,
2906 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
2907}
2908
2909/// HandleByVal - Every parameter *after* a byval parameter is passed
2910/// on the stack. Remember the next parameter register to allocate,
2911/// and then confiscate the rest of the parameter registers to insure
2912/// this.
2913void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2914 Align Alignment) const {
2915 // Byval (as with any stack) slots are always at least 4 byte aligned.
2916 Alignment = std::max(Alignment, Align(4));
2917
2918 unsigned Reg = State->AllocateReg(GPRArgRegs);
2919 if (!Reg)
2920 return;
2921
2922 unsigned AlignInRegs = Alignment.value() / 4;
2923 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2924 for (unsigned i = 0; i < Waste; ++i)
2925 Reg = State->AllocateReg(GPRArgRegs);
2926
2927 if (!Reg)
2928 return;
2929
2930 unsigned Excess = 4 * (ARM::R4 - Reg);
2931
2932 // Special case when NSAA != SP and parameter size greater than size of
2933 // all remained GPR regs. In that case we can't split parameter, we must
2934 // send it to stack. We also must set NCRN to R4, so waste all
2935 // remained registers.
2936 const unsigned NSAAOffset = State->getStackSize();
2937 if (NSAAOffset != 0 && Size > Excess) {
2938 while (State->AllocateReg(GPRArgRegs))
2939 ;
2940 return;
2941 }
2942
2943 // First register for byval parameter is the first register that wasn't
2944 // allocated before this method call, so it would be "reg".
2945 // If parameter is small enough to be saved in range [reg, r4), then
2946 // the end (first after last) register would be reg + param-size-in-regs,
2947 // else parameter would be splitted between registers and stack,
2948 // end register would be r4 in this case.
2949 unsigned ByValRegBegin = Reg;
2950 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2951 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2952 // Note, first register is allocated in the beginning of function already,
2953 // allocate remained amount of registers we need.
2954 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2955 State->AllocateReg(GPRArgRegs);
2956 // A byval parameter that is split between registers and memory needs its
2957 // size truncated here.
2958 // In the case where the entire structure fits in registers, we set the
2959 // size in memory to zero.
2960 Size = std::max<int>(Size - Excess, 0);
2961}
2962
2963/// MatchingStackOffset - Return true if the given stack call argument is
2964/// already available in the same position (relatively) of the caller's
2965/// incoming argument stack.
2966static
2969 const TargetInstrInfo *TII) {
2970 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2971 int FI = std::numeric_limits<int>::max();
2972 if (Arg.getOpcode() == ISD::CopyFromReg) {
2973 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2974 if (!VR.isVirtual())
2975 return false;
2976 MachineInstr *Def = MRI->getVRegDef(VR);
2977 if (!Def)
2978 return false;
2979 if (!Flags.isByVal()) {
2980 if (!TII->isLoadFromStackSlot(*Def, FI))
2981 return false;
2982 } else {
2983 return false;
2984 }
2985 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2986 if (Flags.isByVal())
2987 // ByVal argument is passed in as a pointer but it's now being
2988 // dereferenced. e.g.
2989 // define @foo(%struct.X* %A) {
2990 // tail call @bar(%struct.X* byval %A)
2991 // }
2992 return false;
2993 SDValue Ptr = Ld->getBasePtr();
2994 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2995 if (!FINode)
2996 return false;
2997 FI = FINode->getIndex();
2998 } else
2999 return false;
3000
3001 assert(FI != std::numeric_limits<int>::max());
3002 if (!MFI.isFixedObjectIndex(FI))
3003 return false;
3004 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3005}
3006
3007/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3008/// for tail call optimization. Targets which want to do tail call
3009/// optimization should implement this function. Note that this function also
3010/// processes musttail calls, so when this function returns false on a valid
3011/// musttail call, a fatal backend error occurs.
3012bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3014 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3015 CallingConv::ID CalleeCC = CLI.CallConv;
3016 SDValue Callee = CLI.Callee;
3017 bool isVarArg = CLI.IsVarArg;
3018 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3019 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3021 const SelectionDAG &DAG = CLI.DAG;
3023 const Function &CallerF = MF.getFunction();
3024 CallingConv::ID CallerCC = CallerF.getCallingConv();
3025
3026 assert(Subtarget->supportsTailCall());
3027
3028 // Indirect tail-calls require a register to hold the target address. That
3029 // register must be:
3030 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
3031 // * Not callee-saved, so must be one of r0-r3 or r12.
3032 // * Not used to hold an argument to the tail-called function, which might be
3033 // in r0-r3.
3034 // * Not used to hold the return address authentication code, which is in r12
3035 // if enabled.
3036 // Sometimes, no register matches all of these conditions, so we can't do a
3037 // tail-call.
3038 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3039 SmallSet<MCPhysReg, 5> AddressRegisters;
3040 for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3041 AddressRegisters.insert(R);
3042 if (!(Subtarget->isThumb1Only() ||
3044 AddressRegisters.insert(ARM::R12);
3045 for (const CCValAssign &AL : ArgLocs)
3046 if (AL.isRegLoc())
3047 AddressRegisters.erase(AL.getLocReg());
3048 if (AddressRegisters.empty())
3049 return false;
3050 }
3051
3052 // Look for obvious safe cases to perform tail call optimization that do not
3053 // require ABI changes. This is what gcc calls sibcall.
3054
3055 // Exception-handling functions need a special set of instructions to indicate
3056 // a return to the hardware. Tail-calling another function would probably
3057 // break this.
3058 if (CallerF.hasFnAttribute("interrupt"))
3059 return false;
3060
3061 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3062 return CalleeCC == CallerCC;
3063
3064 // Also avoid sibcall optimization if either caller or callee uses struct
3065 // return semantics.
3066 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3067 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3068 if (isCalleeStructRet || isCallerStructRet)
3069 return false;
3070
3071 // Externally-defined functions with weak linkage should not be
3072 // tail-called on ARM when the OS does not support dynamic
3073 // pre-emption of symbols, as the AAELF spec requires normal calls
3074 // to undefined weak functions to be replaced with a NOP or jump to the
3075 // next instruction. The behaviour of branch instructions in this
3076 // situation (as used for tail calls) is implementation-defined, so we
3077 // cannot rely on the linker replacing the tail call with a return.
3078 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3079 const GlobalValue *GV = G->getGlobal();
3081 if (GV->hasExternalWeakLinkage() &&
3082 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3083 return false;
3084 }
3085
3086 // Check that the call results are passed in the same way.
3087 LLVMContext &C = *DAG.getContext();
3089 getEffectiveCallingConv(CalleeCC, isVarArg),
3090 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3091 CCAssignFnForReturn(CalleeCC, isVarArg),
3092 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3093 return false;
3094 // The callee has to preserve all registers the caller needs to preserve.
3095 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3096 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3097 if (CalleeCC != CallerCC) {
3098 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3099 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3100 return false;
3101 }
3102
3103 // If Caller's vararg or byval argument has been split between registers and
3104 // stack, do not perform tail call, since part of the argument is in caller's
3105 // local frame.
3106 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3107 if (AFI_Caller->getArgRegsSaveSize())
3108 return false;
3109
3110 // If the callee takes no arguments then go on to check the results of the
3111 // call.
3112 if (!Outs.empty()) {
3113 if (CCInfo.getStackSize()) {
3114 // Check if the arguments are already laid out in the right way as
3115 // the caller's fixed stack objects.
3116 MachineFrameInfo &MFI = MF.getFrameInfo();
3117 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3118 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3119 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3120 i != e;
3121 ++i, ++realArgIdx) {
3122 CCValAssign &VA = ArgLocs[i];
3123 EVT RegVT = VA.getLocVT();
3124 SDValue Arg = OutVals[realArgIdx];
3125 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3127 return false;
3128 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3129 // f64 and vector types are split into multiple registers or
3130 // register/stack-slot combinations. The types will not match
3131 // the registers; give up on memory f64 refs until we figure
3132 // out what to do about this.
3133 if (!VA.isRegLoc())
3134 return false;
3135 if (!ArgLocs[++i].isRegLoc())
3136 return false;
3137 if (RegVT == MVT::v2f64) {
3138 if (!ArgLocs[++i].isRegLoc())
3139 return false;
3140 if (!ArgLocs[++i].isRegLoc())
3141 return false;
3142 }
3143 } else if (!VA.isRegLoc()) {
3145 MFI, MRI, TII))
3146 return false;
3147 }
3148 }
3149 }
3150
3151 const MachineRegisterInfo &MRI = MF.getRegInfo();
3152 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3153 return false;
3154 }
3155
3156 return true;
3157}
3158
3159bool
3160ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3161 MachineFunction &MF, bool isVarArg,
3163 LLVMContext &Context) const {
3165 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3166 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3167}
3168
3170 const SDLoc &DL, SelectionDAG &DAG) {
3171 const MachineFunction &MF = DAG.getMachineFunction();
3172 const Function &F = MF.getFunction();
3173
3174 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3175
3176 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3177 // version of the "preferred return address". These offsets affect the return
3178 // instruction if this is a return from PL1 without hypervisor extensions.
3179 // IRQ/FIQ: +4 "subs pc, lr, #4"
3180 // SWI: 0 "subs pc, lr, #0"
3181 // ABORT: +4 "subs pc, lr, #4"
3182 // UNDEF: +4/+2 "subs pc, lr, #0"
3183 // UNDEF varies depending on where the exception came from ARM or Thumb
3184 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3185
3186 int64_t LROffset;
3187 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3188 IntKind == "ABORT")
3189 LROffset = 4;
3190 else if (IntKind == "SWI" || IntKind == "UNDEF")
3191 LROffset = 0;
3192 else
3193 report_fatal_error("Unsupported interrupt attribute. If present, value "
3194 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3195
3196 RetOps.insert(RetOps.begin() + 1,
3197 DAG.getConstant(LROffset, DL, MVT::i32, false));
3198
3199 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3200}
3201
3202SDValue
3203ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3204 bool isVarArg,
3206 const SmallVectorImpl<SDValue> &OutVals,
3207 const SDLoc &dl, SelectionDAG &DAG) const {
3208 // CCValAssign - represent the assignment of the return value to a location.
3210
3211 // CCState - Info about the registers and stack slots.
3212 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3213 *DAG.getContext());
3214
3215 // Analyze outgoing return values.
3216 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3217
3218 SDValue Glue;
3220 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3221 bool isLittleEndian = Subtarget->isLittle();
3222
3225 AFI->setReturnRegsCount(RVLocs.size());
3226
3227 // Report error if cmse entry function returns structure through first ptr arg.
3228 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3229 // Note: using an empty SDLoc(), as the first line of the function is a
3230 // better place to report than the last line.
3233 "secure entry function would return value through pointer",
3234 SDLoc().getDebugLoc());
3235 DAG.getContext()->diagnose(Diag);
3236 }
3237
3238 // Copy the result values into the output registers.
3239 for (unsigned i = 0, realRVLocIdx = 0;
3240 i != RVLocs.size();
3241 ++i, ++realRVLocIdx) {
3242 CCValAssign &VA = RVLocs[i];
3243 assert(VA.isRegLoc() && "Can only return in registers!");
3244
3245 SDValue Arg = OutVals[realRVLocIdx];
3246 bool ReturnF16 = false;
3247
3248 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3249 // Half-precision return values can be returned like this:
3250 //
3251 // t11 f16 = fadd ...
3252 // t12: i16 = bitcast t11
3253 // t13: i32 = zero_extend t12
3254 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3255 //
3256 // to avoid code generation for bitcasts, we simply set Arg to the node
3257 // that produces the f16 value, t11 in this case.
3258 //
3259 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3260 SDValue ZE = Arg.getOperand(0);
3261 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3262 SDValue BC = ZE.getOperand(0);
3263 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3264 Arg = BC.getOperand(0);
3265 ReturnF16 = true;
3266 }
3267 }
3268 }
3269 }
3270
3271 switch (VA.getLocInfo()) {
3272 default: llvm_unreachable("Unknown loc info!");
3273 case CCValAssign::Full: break;
3274 case CCValAssign::BCvt:
3275 if (!ReturnF16)
3276 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3277 break;
3278 }
3279
3280 // Mask f16 arguments if this is a CMSE nonsecure entry.
3281 auto RetVT = Outs[realRVLocIdx].ArgVT;
3282 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3283 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3284 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3285 } else {
3286 auto LocBits = VA.getLocVT().getSizeInBits();
3287 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3288 SDValue Mask =
3289 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3290 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3291 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3292 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3293 }
3294 }
3295
3296 if (VA.needsCustom() &&
3297 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3298 if (VA.getLocVT() == MVT::v2f64) {
3299 // Extract the first half and return it in two registers.
3300 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3301 DAG.getConstant(0, dl, MVT::i32));
3302 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3303 DAG.getVTList(MVT::i32, MVT::i32), Half);
3304
3305 Chain =
3306 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3307 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3308 Glue = Chain.getValue(1);
3309 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3310 VA = RVLocs[++i]; // skip ahead to next loc
3311 Chain =
3312 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3313 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3314 Glue = Chain.getValue(1);
3315 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3316 VA = RVLocs[++i]; // skip ahead to next loc
3317
3318 // Extract the 2nd half and fall through to handle it as an f64 value.
3319 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3320 DAG.getConstant(1, dl, MVT::i32));
3321 }
3322 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3323 // available.
3324 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3325 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3326 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3327 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3328 Glue = Chain.getValue(1);
3329 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3330 VA = RVLocs[++i]; // skip ahead to next loc
3331 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3332 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3333 } else
3334 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3335
3336 // Guarantee that all emitted copies are
3337 // stuck together, avoiding something bad.
3338 Glue = Chain.getValue(1);
3339 RetOps.push_back(DAG.getRegister(
3340 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3341 }
3342 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3343 const MCPhysReg *I =
3344 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3345 if (I) {
3346 for (; *I; ++I) {
3347 if (ARM::GPRRegClass.contains(*I))
3348 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3349 else if (ARM::DPRRegClass.contains(*I))
3351 else
3352 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3353 }
3354 }
3355
3356 // Update chain and glue.
3357 RetOps[0] = Chain;
3358 if (Glue.getNode())
3359 RetOps.push_back(Glue);
3360
3361 // CPUs which aren't M-class use a special sequence to return from
3362 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3363 // though we use "subs pc, lr, #N").
3364 //
3365 // M-class CPUs actually use a normal return sequence with a special
3366 // (hardware-provided) value in LR, so the normal code path works.
3367 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3368 !Subtarget->isMClass()) {
3369 if (Subtarget->isThumb1Only())
3370 report_fatal_error("interrupt attribute is not supported in Thumb1");
3371 return LowerInterruptReturn(RetOps, dl, DAG);
3372 }
3373
3376 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3377}
3378
3379bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3380 if (N->getNumValues() != 1)
3381 return false;
3382 if (!N->hasNUsesOfValue(1, 0))
3383 return false;
3384
3385 SDValue TCChain = Chain;
3386 SDNode *Copy = *N->use_begin();
3387 if (Copy->getOpcode() == ISD::CopyToReg) {
3388 // If the copy has a glue operand, we conservatively assume it isn't safe to
3389 // perform a tail call.
3390 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3391 return false;
3392 TCChain = Copy->getOperand(0);
3393 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3394 SDNode *VMov = Copy;
3395 // f64 returned in a pair of GPRs.
3397 for (SDNode *U : VMov->uses()) {
3398 if (U->getOpcode() != ISD::CopyToReg)
3399 return false;
3400 Copies.insert(U);
3401 }
3402 if (Copies.size() > 2)
3403 return false;
3404
3405 for (SDNode *U : VMov->uses()) {
3406 SDValue UseChain = U->getOperand(0);
3407 if (Copies.count(UseChain.getNode()))
3408 // Second CopyToReg
3409 Copy = U;
3410 else {
3411 // We are at the top of this chain.
3412 // If the copy has a glue operand, we conservatively assume it
3413 // isn't safe to perform a tail call.
3414 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3415 return false;
3416 // First CopyToReg
3417 TCChain = UseChain;
3418 }
3419 }
3420 } else if (Copy->getOpcode() == ISD::BITCAST) {
3421 // f32 returned in a single GPR.
3422 if (!Copy->hasOneUse())
3423 return false;
3424 Copy = *Copy->use_begin();
3425 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3426 return false;
3427 // If the copy has a glue operand, we conservatively assume it isn't safe to
3428 // perform a tail call.
3429 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3430 return false;
3431 TCChain = Copy->getOperand(0);
3432 } else {
3433 return false;
3434 }
3435
3436 bool HasRet = false;
3437 for (const SDNode *U : Copy->uses()) {
3438 if (U->getOpcode() != ARMISD::RET_GLUE &&
3439 U->getOpcode() != ARMISD::INTRET_GLUE)
3440 return false;
3441 HasRet = true;
3442 }
3443
3444 if (!HasRet)
3445 return false;
3446
3447 Chain = TCChain;
3448 return true;
3449}
3450
3451bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3452 if (!Subtarget->supportsTailCall())
3453 return false;
3454
3455 if (!CI->isTailCall())
3456 return false;
3457
3458 return true;
3459}
3460
3461// Trying to write a 64 bit value so need to split into two 32 bit values first,
3462// and pass the lower and high parts through.
3464 SDLoc DL(Op);
3465 SDValue WriteValue = Op->getOperand(2);
3466
3467 // This function is only supposed to be called for i64 type argument.
3468 assert(WriteValue.getValueType() == MVT::i64
3469 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3470
3471 SDValue Lo, Hi;
3472 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3473 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3474 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3475}
3476
3477// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3478// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3479// one of the above mentioned nodes. It has to be wrapped because otherwise
3480// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3481// be used to form addressing mode. These wrapped nodes will be selected
3482// into MOVi.
3483SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3484 SelectionDAG &DAG) const {
3485 EVT PtrVT = Op.getValueType();
3486 // FIXME there is no actual debug info here
3487 SDLoc dl(Op);
3488 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3489 SDValue Res;
3490
3491 // When generating execute-only code Constant Pools must be promoted to the
3492 // global data section. It's a bit ugly that we can't share them across basic
3493 // blocks, but this way we guarantee that execute-only behaves correct with
3494 // position-independent addressing modes.
3495 if (Subtarget->genExecuteOnly()) {
3496 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3497 auto T = const_cast<Type*>(CP->getType());
3498 auto C = const_cast<Constant*>(CP->getConstVal());
3499 auto M = const_cast<Module*>(DAG.getMachineFunction().
3501 auto GV = new GlobalVariable(
3502 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3505 Twine(AFI->createPICLabelUId())
3506 );
3507 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3508 dl, PtrVT);
3509 return LowerGlobalAddress(GA, DAG);
3510 }
3511
3512 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3513 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3514 Align CPAlign = CP->getAlign();
3515 if (Subtarget->isThumb1Only())
3516 CPAlign = std::max(CPAlign, Align(4));
3517 if (CP->isMachineConstantPoolEntry())
3518 Res =
3519 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3520 else
3521 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3522 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3523}
3524
3526 // If we don't have a 32-bit pc-relative branch instruction then the jump
3527 // table consists of block addresses. Usually this is inline, but for
3528 // execute-only it must be placed out-of-line.
3529 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3532}
3533
3534SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3535 SelectionDAG &DAG) const {
3538 unsigned ARMPCLabelIndex = 0;
3539 SDLoc DL(Op);
3540 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3541 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3542 SDValue CPAddr;
3543 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3544 if (!IsPositionIndependent) {
3545 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3546 } else {
3547 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3548 ARMPCLabelIndex = AFI->createPICLabelUId();
3550 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3551 ARMCP::CPBlockAddress, PCAdj);
3552 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3553 }
3554 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3555 SDValue Result = DAG.getLoad(
3556 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3558 if (!IsPositionIndependent)
3559 return Result;
3560 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3561 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3562}
3563
3564/// Convert a TLS address reference into the correct sequence of loads
3565/// and calls to compute the variable's address for Darwin, and return an
3566/// SDValue containing the final node.
3567
3568/// Darwin only has one TLS scheme which must be capable of dealing with the
3569/// fully general situation, in the worst case. This means:
3570/// + "extern __thread" declaration.
3571/// + Defined in a possibly unknown dynamic library.
3572///
3573/// The general system is that each __thread variable has a [3 x i32] descriptor
3574/// which contains information used by the runtime to calculate the address. The
3575/// only part of this the compiler needs to know about is the first word, which
3576/// contains a function pointer that must be called with the address of the
3577/// entire descriptor in "r0".
3578///
3579/// Since this descriptor may be in a different unit, in general access must
3580/// proceed along the usual ARM rules. A common sequence to produce is:
3581///
3582/// movw rT1, :lower16:_var$non_lazy_ptr
3583/// movt rT1, :upper16:_var$non_lazy_ptr
3584/// ldr r0, [rT1]
3585/// ldr rT2, [r0]
3586/// blx rT2
3587/// [...address now in r0...]
3588SDValue
3589ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3590 SelectionDAG &DAG) const {
3591 assert(Subtarget->isTargetDarwin() &&
3592 "This function expects a Darwin target");
3593 SDLoc DL(Op);
3594
3595 // First step is to get the address of the actua global symbol. This is where
3596 // the TLS descriptor lives.
3597 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3598
3599 // The first entry in the descriptor is a function pointer that we must call
3600 // to obtain the address of the variable.
3601 SDValue Chain = DAG.getEntryNode();
3602 SDValue FuncTLVGet = DAG.getLoad(
3603 MVT::i32, DL, Chain, DescAddr,
3607 Chain = FuncTLVGet.getValue(1);
3608
3610 MachineFrameInfo &MFI = F.getFrameInfo();
3611 MFI.setAdjustsStack(true);
3612
3613 // TLS calls preserve all registers except those that absolutely must be
3614 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3615 // silly).
3616 auto TRI =
3618 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3620
3621 // Finally, we can make the call. This is just a degenerate version of a
3622 // normal AArch64 call node: r0 takes the address of the descriptor, and
3623 // returns the address of the variable in this thread.
3624 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3625 Chain =
3626 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3627 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3628 DAG.getRegisterMask(Mask), Chain.getValue(1));
3629 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3630}
3631
3632SDValue
3633ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3634 SelectionDAG &DAG) const {
3635 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3636
3637 SDValue Chain = DAG.getEntryNode();
3638 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3639 SDLoc DL(Op);
3640
3641 // Load the current TEB (thread environment block)
3642 SDValue Ops[] = {Chain,
3643 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3644 DAG.getTargetConstant(15, DL, MVT::i32),
3645 DAG.getTargetConstant(0, DL, MVT::i32),
3646 DAG.getTargetConstant(13, DL, MVT::i32),
3647 DAG.getTargetConstant(0, DL, MVT::i32),
3648 DAG.getTargetConstant(2, DL, MVT::i32)};
3649 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3650 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3651
3652 SDValue TEB = CurrentTEB.getValue(0);
3653 Chain = CurrentTEB.getValue(1);
3654
3655 // Load the ThreadLocalStoragePointer from the TEB
3656 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3657 SDValue TLSArray =
3658 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3659 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3660
3661 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3662 // offset into the TLSArray.
3663
3664 // Load the TLS index from the C runtime
3665 SDValue TLSIndex =
3666 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3667 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3668 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3669
3670 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3671 DAG.getConstant(2, DL, MVT::i32));
3672 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3673 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3675
3676 // Get the offset of the start of the .tls section (section base)
3677 const auto *GA = cast<GlobalAddressSDNode>(Op);
3678 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3679 SDValue Offset = DAG.getLoad(
3680 PtrVT, DL, Chain,
3681 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3682 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3684
3685 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3686}
3687
3688// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3689SDValue
3690ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3691 SelectionDAG &DAG) const {
3692 SDLoc dl(GA);
3693 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3694 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3699 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3700 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3701 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3702 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3703 Argument = DAG.getLoad(
3704 PtrVT, dl, DAG.getEntryNode(), Argument,
3706 SDValue Chain = Argument.getValue(1);
3707
3708 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3709 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3710
3711 // call __tls_get_addr.
3713 ArgListEntry Entry;
3714 Entry.Node = Argument;
3715 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3716 Args.push_back(Entry);
3717
3718 // FIXME: is there useful debug info available here?
3720 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3722 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3723
3724 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3725 return CallResult.first;
3726}
3727
3728// Lower ISD::GlobalTLSAddress using the "initial exec" or
3729// "local exec" model.
3730SDValue
3731ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3732 SelectionDAG &DAG,
3733 TLSModel::Model model) const {
3734 const GlobalValue *GV = GA->getGlobal();
3735 SDLoc dl(GA);
3737 SDValue Chain = DAG.getEntryNode();
3738 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3739 // Get the Thread Pointer
3741
3742 if (model == TLSModel::InitialExec) {
3745 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3746 // Initial exec model.
3747 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3749 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3751 true);
3752 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3753 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3754 Offset = DAG.getLoad(
3755 PtrVT, dl, Chain, Offset,
3757 Chain = Offset.getValue(1);
3758
3759 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3760 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3761
3762 Offset = DAG.getLoad(
3763 PtrVT, dl, Chain, Offset,
3765 } else {
3766 // local exec model
3767 assert(model == TLSModel::LocalExec);
3770 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3771 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3772 Offset = DAG.getLoad(
3773 PtrVT, dl, Chain, Offset,
3775 }
3776
3777 // The address of the thread local variable is the add of the thread
3778 // pointer with the offset of the variable.
3779 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3780}
3781
3782SDValue
3783ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3784 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3785 if (DAG.getTarget().useEmulatedTLS())
3786 return LowerToTLSEmulatedModel(GA, DAG);
3787
3788 if (Subtarget->isTargetDarwin())
3789 return LowerGlobalTLSAddressDarwin(Op, DAG);
3790
3791 if (Subtarget->isTargetWindows())
3792 return LowerGlobalTLSAddressWindows(Op, DAG);
3793
3794 // TODO: implement the "local dynamic" model
3795 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3797
3798 switch (model) {
3801 return LowerToTLSGeneralDynamicModel(GA, DAG);
3804 return LowerToTLSExecModels(GA, DAG, model);
3805 }
3806 llvm_unreachable("bogus TLS model");
3807}
3808
3809/// Return true if all users of V are within function F, looking through
3810/// ConstantExprs.
3811static bool allUsersAreInFunction(const Value *V, const Function *F) {
3812 SmallVector<const User*,4> Worklist(V->users());
3813 while (!Worklist.empty()) {
3814 auto *U = Worklist.pop_back_val();
3815 if (isa<ConstantExpr>(U)) {
3816 append_range(Worklist, U->users());
3817 continue;
3818 }
3819
3820 auto *I = dyn_cast<Instruction>(U);
3821 if (!I || I->getParent()->getParent() != F)
3822 return false;
3823 }
3824 return true;
3825}
3826
3828 const GlobalValue *GV, SelectionDAG &DAG,
3829 EVT PtrVT, const SDLoc &dl) {
3830 // If we're creating a pool entry for a constant global with unnamed address,
3831 // and the global is small enough, we can emit it inline into the constant pool
3832 // to save ourselves an indirection.
3833 //
3834 // This is a win if the constant is only used in one function (so it doesn't
3835 // need to be duplicated) or duplicating the constant wouldn't increase code
3836 // size (implying the constant is no larger than 4 bytes).
3837 const Function &F = DAG.getMachineFunction().getFunction();
3838
3839 // We rely on this decision to inline being idemopotent and unrelated to the
3840 // use-site. We know that if we inline a variable at one use site, we'll
3841 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3842 // doesn't know about this optimization, so bail out if it's enabled else
3843 // we could decide to inline here (and thus never emit the GV) but require
3844 // the GV from fast-isel generated code.
3847 return SDValue();
3848
3849 auto *GVar = dyn_cast<GlobalVariable>(GV);
3850 if (!GVar || !GVar->hasInitializer() ||
3851 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3852 !GVar->hasLocalLinkage())
3853 return SDValue();
3854
3855 // If we inline a value that contains relocations, we move the relocations
3856 // from .data to .text. This is not allowed in position-independent code.
3857 auto *Init = GVar->getInitializer();
3858 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3859 Init->needsDynamicRelocation())
3860 return SDValue();
3861
3862 // The constant islands pass can only really deal with alignment requests
3863 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3864 // any type wanting greater alignment requirements than 4 bytes. We also
3865 // can only promote constants that are multiples of 4 bytes in size or
3866 // are paddable to a multiple of 4. Currently we only try and pad constants
3867 // that are strings for simplicity.
3868 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3869 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3870 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3871 unsigned RequiredPadding = 4 - (Size % 4);
3872 bool PaddingPossible =
3873 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3874 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3875 Size == 0)
3876 return SDValue();
3877
3878 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3881
3882 // We can't bloat the constant pool too much, else the ConstantIslands pass
3883 // may fail to converge. If we haven't promoted this global yet (it may have
3884 // multiple uses), and promoting it would increase the constant pool size (Sz
3885 // > 4), ensure we have space to do so up to MaxTotal.
3886 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3887 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3889 return SDValue();
3890
3891 // This is only valid if all users are in a single function; we can't clone
3892 // the constant in general. The LLVM IR unnamed_addr allows merging
3893 // constants, but not cloning them.
3894 //
3895 // We could potentially allow cloning if we could prove all uses of the
3896 // constant in the current function don't care about the address, like
3897 // printf format strings. But that isn't implemented for now.
3898 if (!allUsersAreInFunction(GVar, &F))
3899 return SDValue();
3900
3901 // We're going to inline this global. Pad it out if needed.
3902 if (RequiredPadding != 4) {
3903 StringRef S = CDAInit->getAsString();
3904
3906 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3907 while (RequiredPadding--)
3908 V.push_back(0);
3910 }
3911
3912 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3913 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3914 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3917 PaddedSize - 4);
3918 }
3919 ++NumConstpoolPromoted;
3920 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3921}
3922
3924 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3925 if (!(GV = GA->getAliaseeObject()))
3926 return false;
3927 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3928 return V->isConstant();
3929 return isa<Function>(GV);
3930}
3931
3932SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3933 SelectionDAG &DAG) const {
3934 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3935 default: llvm_unreachable("unknown object format");
3936 case Triple::COFF:
3937 return LowerGlobalAddressWindows(Op, DAG);
3938 case Triple::ELF:
3939 return LowerGlobalAddressELF(Op, DAG);
3940 case Triple::MachO:
3941 return LowerGlobalAddressDarwin(Op, DAG);
3942 }
3943}
3944
3945SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3946 SelectionDAG &DAG) const {
3947 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3948 SDLoc dl(Op);
3949 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3950 bool IsRO = isReadOnly(GV);
3951
3952 // promoteToConstantPool only if not generating XO text section
3953 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3954 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3955 return V;
3956
3957 if (isPositionIndependent()) {
3959 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3960 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3961 if (!GV->isDSOLocal())
3962 Result =
3963 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3965 return Result;
3966 } else if (Subtarget->isROPI() && IsRO) {
3967 // PC-relative.
3968 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3969 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3970 return Result;
3971 } else if (Subtarget->isRWPI() && !IsRO) {
3972 // SB-relative.
3973 SDValue RelAddr;
3974 if (Subtarget->useMovt()) {
3975 ++NumMovwMovt;
3976 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3977 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3978 } else { // use literal pool for address constant
3981 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3982 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3983 RelAddr = DAG.getLoad(
3984 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3986 }
3987 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3988 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3989 return Result;
3990 }
3991
3992 // If we have T2 ops, we can materialize the address directly via movt/movw
3993 // pair. This is always cheaper. If need to generate Execute Only code, and we
3994 // only have Thumb1 available, we can't use a constant pool and are forced to
3995 // use immediate relocations.
3996 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3997 if (Subtarget->useMovt())
3998 ++NumMovwMovt;
3999 // FIXME: Once remat is capable of dealing with instructions with register
4000 // operands, expand this into two nodes.
4001 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
4002 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
4003 } else {
4004 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
4005 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4006 return DAG.getLoad(
4007 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4009 }
4010}
4011
4012SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
4013 SelectionDAG &DAG) const {
4014 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4015 "ROPI/RWPI not currently supported for Darwin");
4016 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4017 SDLoc dl(Op);
4018 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4019
4020 if (Subtarget->useMovt())
4021 ++NumMovwMovt;
4022
4023 // FIXME: Once remat is capable of dealing with instructions with register
4024 // operands, expand this into multiple nodes
4025 unsigned Wrapper =
4027
4028 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4029 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4030
4031 if (Subtarget->isGVIndirectSymbol(GV))
4032 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4034 return Result;
4035}
4036
4037SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4038 SelectionDAG &DAG) const {
4039 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4040 assert(Subtarget->useMovt() &&
4041 "Windows on ARM expects to use movw/movt");
4042 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4043 "ROPI/RWPI not currently supported for Windows");
4044
4046 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4047 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4048 if (GV->hasDLLImportStorageClass())
4049 TargetFlags = ARMII::MO_DLLIMPORT;
4050 else if (!TM.shouldAssumeDSOLocal(GV))
4051 TargetFlags = ARMII::MO_COFFSTUB;
4052 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4054 SDLoc DL(Op);
4055
4056 ++NumMovwMovt;
4057
4058 // FIXME: Once remat is capable of dealing with instructions with register
4059 // operands, expand this into two nodes.
4060 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4061 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4062 TargetFlags));
4063 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4064 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4066 return Result;
4067}
4068
4069SDValue
4070ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4071 SDLoc dl(Op);
4072 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4073 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4074 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4075 Op.getOperand(1), Val);
4076}
4077
4078SDValue
4079ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4080 SDLoc dl(Op);
4081 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4082 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4083}
4084
4085SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4086 SelectionDAG &DAG) const {
4087 SDLoc dl(Op);
4088 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4089 Op.getOperand(0));
4090}
4091
4092SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4093 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4094 unsigned IntNo =
4095 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4096 switch (IntNo) {
4097 default:
4098 return SDValue(); // Don't custom lower most intrinsics.
4099 case Intrinsic::arm_gnu_eabi_mcount: {
4101 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4102 SDLoc dl(Op);
4103 SDValue Chain = Op.getOperand(0);
4104 // call "\01__gnu_mcount_nc"
4105 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4106 const uint32_t *Mask =
4108 assert(Mask && "Missing call preserved mask for calling convention");
4109 // Mark LR an implicit live-in.
4110 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4111 SDValue ReturnAddress =
4112 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4113 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4114 SDValue Callee =
4115 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4117 if (Subtarget->isThumb())
4118 return SDValue(
4119 DAG.getMachineNode(
4120 ARM::tBL_PUSHLR, dl, ResultTys,
4121 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4122 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4123 0);
4124 return SDValue(
4125 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4126 {ReturnAddress, Callee, RegisterMask, Chain}),
4127 0);
4128 }
4129 }
4130}
4131
4132SDValue
4133ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4134 const ARMSubtarget *Subtarget) const {
4135 unsigned IntNo = Op.getConstantOperandVal(0);
4136 SDLoc dl(Op);
4137 switch (IntNo) {
4138 default: return SDValue(); // Don't custom lower most intrinsics.
4139 case Intrinsic::thread_pointer: {
4140 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4141 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4142 }
4143 case Intrinsic::arm_cls: {
4144 const SDValue &Operand = Op.getOperand(1);
4145 const EVT VTy = Op.getValueType();
4146 SDValue SRA =
4147 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4148 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4149 SDValue SHL =
4150 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4151 SDValue OR =
4152 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4153 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4154 return Result;
4155 }
4156 case Intrinsic::arm_cls64: {
4157 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4158 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4159 const SDValue &Operand = Op.getOperand(1);
4160 const EVT VTy = Op.getValueType();
4161 SDValue Lo, Hi;
4162 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4163 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4164 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4165 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4166 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4167 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4168 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4169 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4170 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4171 SDValue CheckLo =
4172 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4173 SDValue HiIsZero =
4174 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4175 SDValue AdjustedLo =
4176 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4177 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4178 SDValue Result =
4179 DAG.getSelect(dl, VTy, CheckLo,
4180 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4181 return Result;
4182 }
4183 case Intrinsic::eh_sjlj_lsda: {
4186 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4187 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4188 SDValue CPAddr;
4189 bool IsPositionIndependent = isPositionIndependent();
4190 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4192 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4193 ARMCP::CPLSDA, PCAdj);
4194 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4195 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4196 SDValue Result = DAG.getLoad(
4197 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4199
4200 if (IsPositionIndependent) {
4201 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4202 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4203 }
4204 return Result;
4205 }
4206 case Intrinsic::arm_neon_vabs:
4207 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4208 Op.getOperand(1));
4209 case Intrinsic::arm_neon_vabds:
4210 if (Op.getValueType().isInteger())
4211 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4212 Op.getOperand(1), Op.getOperand(2));
4213 return SDValue();
4214 case Intrinsic::arm_neon_vabdu:
4215 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4216 Op.getOperand(1), Op.getOperand(2));
4217 case Intrinsic::arm_neon_vmulls:
4218 case Intrinsic::arm_neon_vmullu: {
4219 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4221 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4222 Op.getOperand(1), Op.getOperand(2));
4223 }
4224 case Intrinsic::arm_neon_vminnm:
4225 case Intrinsic::arm_neon_vmaxnm: {
4226 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4228 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4229 Op.getOperand(1), Op.getOperand(2));
4230 }
4231 case Intrinsic::arm_neon_vminu:
4232 case Intrinsic::arm_neon_vmaxu: {
4233 if (Op.getValueType().isFloatingPoint())
4234 return SDValue();
4235 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4236 ? ISD::UMIN : ISD::UMAX;
4237 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4238 Op.getOperand(1), Op.getOperand(2));
4239 }
4240 case Intrinsic::arm_neon_vmins:
4241 case Intrinsic::arm_neon_vmaxs: {
4242 // v{min,max}s is overloaded between signed integers and floats.
4243 if (!Op.getValueType().isFloatingPoint()) {
4244 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4245 ? ISD::SMIN : ISD::SMAX;
4246 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4247 Op.getOperand(1), Op.getOperand(2));
4248 }
4249 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4251 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4252 Op.getOperand(1), Op.getOperand(2));
4253 }
4254 case Intrinsic::arm_neon_vtbl1:
4255 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4256 Op.getOperand(1), Op.getOperand(2));
4257 case Intrinsic::arm_neon_vtbl2:
4258 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4259 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4260 case Intrinsic::arm_mve_pred_i2v:
4261 case Intrinsic::arm_mve_pred_v2i:
4262 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4263 Op.getOperand(1));
4264 case Intrinsic::arm_mve_vreinterpretq:
4265 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4266 Op.getOperand(1));
4267 case Intrinsic::arm_mve_lsll:
4268 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4269 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4270 case Intrinsic::arm_mve_asrl:
4271 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4272 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4273 }
4274}
4275
4277 const ARMSubtarget *Subtarget) {
4278 SDLoc dl(Op);
4279 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4280 if (SSID == SyncScope::SingleThread)
4281 return Op;
4282
4283 if (!Subtarget->hasDataBarrier()) {
4284 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4285 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4286 // here.
4287 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4288 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4289 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4290 DAG.getConstant(0, dl, MVT::i32));
4291 }
4292
4293 AtomicOrdering Ord =
4294 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4296 if (Subtarget->isMClass()) {
4297 // Only a full system barrier exists in the M-class architectures.
4299 } else if (Subtarget->preferISHSTBarriers() &&
4300 Ord == AtomicOrdering::Release) {
4301 // Swift happens to implement ISHST barriers in a way that's compatible with
4302 // Release semantics but weaker than ISH so we'd be fools not to use
4303 // it. Beware: other processors probably don't!
4305 }
4306
4307 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4308 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4309 DAG.getConstant(Domain, dl, MVT::i32));
4310}
4311
4313 const ARMSubtarget *Subtarget) {
4314 // ARM pre v5TE and Thumb1 does not have preload instructions.
4315 if (!(Subtarget->isThumb2() ||
4316 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4317 // Just preserve the chain.
4318 return Op.getOperand(0);
4319
4320 SDLoc dl(Op);
4321 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4322 if (!isRead &&
4323 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4324 // ARMv7 with MP extension has PLDW.
4325 return Op.getOperand(0);
4326
4327 unsigned isData = Op.getConstantOperandVal(4);
4328 if (Subtarget->isThumb()) {
4329 // Invert the bits.
4330 isRead = ~isRead & 1;
4331 isData = ~isData & 1;
4332 }
4333
4334 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4335 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4336 DAG.getConstant(isData, dl, MVT::i32));
4337}
4338
4341 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4342
4343 // vastart just stores the address of the VarArgsFrameIndex slot into the
4344 // memory location argument.
4345 SDLoc dl(Op);
4347 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4348 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4349 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4350 MachinePointerInfo(SV));
4351}
4352
4353SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4354 CCValAssign &NextVA,
4355 SDValue &Root,
4356 SelectionDAG &DAG,
4357 const SDLoc &dl) const {
4360
4361 const TargetRegisterClass *RC;
4362 if (AFI->isThumb1OnlyFunction())
4363 RC = &ARM::tGPRRegClass;
4364 else
4365 RC = &ARM::GPRRegClass;
4366
4367 // Transform the arguments stored in physical registers into virtual ones.
4368 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4369 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4370
4371 SDValue ArgValue2;
4372 if (NextVA.isMemLoc()) {
4373 MachineFrameInfo &MFI = MF.getFrameInfo();
4374 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4375
4376 // Create load node to retrieve arguments from the stack.
4377 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4378 ArgValue2 = DAG.getLoad(
4379 MVT::i32, dl, Root, FIN,
4381 } else {
4382 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4383 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4384 }
4385 if (!Subtarget->isLittle())
4386 std::swap (ArgValue, ArgValue2);
4387 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4388}
4389
4390// The remaining GPRs hold either the beginning of variable-argument
4391// data, or the beginning of an aggregate passed by value (usually
4392// byval). Either way, we allocate stack slots adjacent to the data
4393// provided by our caller, and store the unallocated registers there.
4394// If this is a variadic function, the va_list pointer will begin with
4395// these values; otherwise, this reassembles a (byval) structure that
4396// was split between registers and memory.
4397// Return: The frame index registers were stored into.
4398int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4399 const SDLoc &dl, SDValue &Chain,
4400 const Value *OrigArg,
4401 unsigned InRegsParamRecordIdx,
4402 int ArgOffset, unsigned ArgSize) const {
4403 // Currently, two use-cases possible:
4404 // Case #1. Non-var-args function, and we meet first byval parameter.
4405 // Setup first unallocated register as first byval register;
4406 // eat all remained registers
4407 // (these two actions are performed by HandleByVal method).
4408 // Then, here, we initialize stack frame with
4409 // "store-reg" instructions.
4410 // Case #2. Var-args function, that doesn't contain byval parameters.
4411 // The same: eat all remained unallocated registers,
4412 // initialize stack frame.
4413
4415 MachineFrameInfo &MFI = MF.getFrameInfo();
4417 unsigned RBegin, REnd;
4418 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4419 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4420 } else {
4421 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4422 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4423 REnd = ARM::R4;
4424 }
4425
4426 if (REnd != RBegin)
4427 ArgOffset = -4 * (ARM::R4 - RBegin);
4428
4429 auto PtrVT = getPointerTy(DAG.getDataLayout());
4430 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4431 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4432
4434 const TargetRegisterClass *RC =
4435 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4436
4437 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4438 Register VReg = MF.addLiveIn(Reg, RC);
4439 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4440 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4441 MachinePointerInfo(OrigArg, 4 * i));
4442 MemOps.push_back(Store);
4443 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4444 }
4445
4446 if (!MemOps.empty())
4447 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4448 return FrameIndex;
4449}
4450
4451// Setup stack frame, the va_list pointer will start from.
4452void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4453 const SDLoc &dl, SDValue &Chain,
4454 unsigned ArgOffset,
4455 unsigned TotalArgRegsSaveSize,
4456 bool ForceMutable) const {
4459
4460 // Try to store any remaining integer argument regs
4461 // to their spots on the stack so that they may be loaded by dereferencing
4462 // the result of va_next.
4463 // If there is no regs to be stored, just point address after last
4464 // argument passed via stack.
4465 int FrameIndex = StoreByValRegs(
4466 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4467 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4468 AFI->setVarArgsFrameIndex(FrameIndex);
4469}
4470
4471bool ARMTargetLowering::splitValueIntoRegisterParts(
4472 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4473 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4474 EVT ValueVT = Val.getValueType();
4475 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4476 unsigned ValueBits = ValueVT.getSizeInBits();
4477 unsigned PartBits = PartVT.getSizeInBits();
4478 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4479 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4480 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4481 Parts[0] = Val;
4482 return true;
4483 }
4484 return false;
4485}
4486
4487SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4488 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4489 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4490 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4491 unsigned ValueBits = ValueVT.getSizeInBits();
4492 unsigned PartBits = PartVT.getSizeInBits();
4493 SDValue Val = Parts[0];
4494
4495 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4496 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4497 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4498 return Val;
4499 }
4500 return SDValue();
4501}
4502
4503SDValue ARMTargetLowering::LowerFormalArguments(
4504 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4505 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4506 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4508 MachineFrameInfo &MFI = MF.getFrameInfo();
4509
4511
4512 // Assign locations to all of the incoming arguments.
4514 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4515 *DAG.getContext());
4516 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4517
4519 unsigned CurArgIdx = 0;
4520
4521 // Initially ArgRegsSaveSize is zero.
4522 // Then we increase this value each time we meet byval parameter.
4523 // We also increase this value in case of varargs function.
4524 AFI->setArgRegsSaveSize(0);
4525
4526 // Calculate the amount of stack space that we need to allocate to store
4527 // byval and variadic arguments that are passed in registers.
4528 // We need to know this before we allocate the first byval or variadic
4529 // argument, as they will be allocated a stack slot below the CFA (Canonical
4530 // Frame Address, the stack pointer at entry to the function).
4531 unsigned ArgRegBegin = ARM::R4;
4532 for (const CCValAssign &VA : ArgLocs) {
4533 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4534 break;
4535
4536 unsigned Index = VA.getValNo();
4537 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4538 if (!Flags.isByVal())
4539 continue;
4540
4541 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4542 unsigned RBegin, REnd;
4543 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4544 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4545
4546 CCInfo.nextInRegsParam();
4547 }
4548 CCInfo.rewindByValRegsInfo();
4549
4550 int lastInsIndex = -1;
4551 if (isVarArg && MFI.hasVAStart()) {
4552 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4553 if (RegIdx != std::size(GPRArgRegs))
4554 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4555 }
4556
4557 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4558 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4559 auto PtrVT = getPointerTy(DAG.getDataLayout());
4560
4561 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4562 CCValAssign &VA = ArgLocs[i];
4563 if (Ins[VA.getValNo()].isOrigArg()) {
4564 std::advance(CurOrigArg,
4565 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4566 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4567 }
4568 // Arguments stored in registers.
4569 if (VA.isRegLoc()) {
4570 EVT RegVT = VA.getLocVT();
4571 SDValue ArgValue;
4572
4573 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4574 // f64 and vector types are split up into multiple registers or
4575 // combinations of registers and stack slots.
4576 SDValue ArgValue1 =
4577 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4578 VA = ArgLocs[++i]; // skip ahead to next loc
4579 SDValue ArgValue2;
4580 if (VA.isMemLoc()) {
4581 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4582 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4583 ArgValue2 = DAG.getLoad(
4584 MVT::f64, dl, Chain, FIN,
4586 } else {
4587 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4588 }
4589 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4590 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4591 ArgValue1, DAG.getIntPtrConstant(0, dl));
4592 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4593 ArgValue2, DAG.getIntPtrConstant(1, dl));
4594 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4595 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4596 } else {
4597 const TargetRegisterClass *RC;
4598
4599 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4600 RC = &ARM::HPRRegClass;
4601 else if (RegVT == MVT::f32)
4602 RC = &ARM::SPRRegClass;
4603 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4604 RegVT == MVT::v4bf16)
4605 RC = &ARM::DPRRegClass;
4606 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4607 RegVT == MVT::v8bf16)
4608 RC = &ARM::QPRRegClass;
4609 else if (RegVT == MVT::i32)
4610 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4611 : &ARM::GPRRegClass;
4612 else
4613 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4614
4615 // Transform the arguments in physical registers into virtual ones.
4616 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4617 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4618
4619 // If this value is passed in r0 and has the returned attribute (e.g.
4620 // C++ 'structors), record this fact for later use.
4621 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4622 AFI->setPreservesR0();
4623 }
4624 }
4625
4626 // If this is an 8 or 16-bit value, it is really passed promoted
4627 // to 32 bits. Insert an assert[sz]ext to capture this, then
4628 // truncate to the right size.
4629 switch (VA.getLocInfo()) {
4630 default: llvm_unreachable("Unknown loc info!");
4631 case CCValAssign::Full: break;
4632 case CCValAssign::BCvt:
4633 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4634 break;
4635 }
4636
4637 // f16 arguments have their size extended to 4 bytes and passed as if they
4638 // had been copied to the LSBs of a 32-bit register.
4639 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4640 if (VA.needsCustom() &&
4641 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4642 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4643
4644 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4645 // less than 32 bits must be sign- or zero-extended in the callee for
4646 // security reasons. Although the ABI mandates an extension done by the
4647 // caller, the latter cannot be trusted to follow the rules of the ABI.
4648 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4649 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4650 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4651 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4652
4653 InVals.push_back(ArgValue);
4654 } else { // VA.isRegLoc()
4655 // Only arguments passed on the stack should make it here.
4656 assert(VA.isMemLoc());
4657 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4658
4659 int index = VA.getValNo();
4660
4661 // Some Ins[] entries become multiple ArgLoc[] entries.
4662 // Process them only once.
4663 if (index != lastInsIndex)
4664 {
4665 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4666 // FIXME: For now, all byval parameter objects are marked mutable.
4667 // This can be changed with more analysis.
4668 // In case of tail call optimization mark all arguments mutable.
4669 // Since they could be overwritten by lowering of arguments in case of
4670 // a tail call.
4671 if (Flags.isByVal()) {
4672 assert(Ins[index].isOrigArg() &&
4673 "Byval arguments cannot be implicit");
4674 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4675
4676 int FrameIndex = StoreByValRegs(
4677 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4678 VA.getLocMemOffset(), Flags.getByValSize());
4679 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4680 CCInfo.nextInRegsParam();
4681 } else {
4682 unsigned FIOffset = VA.getLocMemOffset();
4683 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4684 FIOffset, true);
4685
4686 // Create load nodes to retrieve arguments from the stack.
4687 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4688 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4690 DAG.getMachineFunction(), FI)));
4691 }
4692 lastInsIndex = index;
4693 }
4694 }
4695 }
4696
4697 // varargs
4698 if (isVarArg && MFI.hasVAStart()) {
4699 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4700 TotalArgRegsSaveSize);
4701 if (AFI->isCmseNSEntryFunction()) {
4704 "secure entry function must not be variadic", dl.getDebugLoc());
4705 DAG.getContext()->diagnose(Diag);
4706 }
4707 }
4708
4709 unsigned StackArgSize = CCInfo.getStackSize();
4710 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4711 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4712 // The only way to guarantee a tail call is if the callee restores its
4713 // argument area, but it must also keep the stack aligned when doing so.
4714 const DataLayout &DL = DAG.getDataLayout();
4715 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4716
4717 AFI->setArgumentStackToRestore(StackArgSize);
4718 }
4719 AFI->setArgumentStackSize(StackArgSize);
4720
4721 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4724 "secure entry function requires arguments on stack", dl.getDebugLoc());
4725 DAG.getContext()->diagnose(Diag);
4726 }
4727
4728 return Chain;
4729}
4730
4731/// isFloatingPointZero - Return true if this is +0.0.
4733 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4734 return CFP->getValueAPF().isPosZero();
4735 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4736 // Maybe this has already been legalized into the constant pool?
4737 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4738 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4739 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4740 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4741 return CFP->getValueAPF().isPosZero();
4742 }
4743 } else if (Op->getOpcode() == ISD::BITCAST &&
4744 Op->getValueType(0) == MVT::f64) {
4745 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4746 // created by LowerConstantFP().
4747 SDValue BitcastOp = Op->getOperand(0);
4748 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4749 isNullConstant(BitcastOp->getOperand(0)))
4750 return true;
4751 }
4752 return false;
4753}
4754
4755/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4756/// the given operands.
4757SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4758 SDValue &ARMcc, SelectionDAG &DAG,
4759 const SDLoc &dl) const {
4760 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4761 unsigned C = RHSC->getZExtValue();
4762 if (!isLegalICmpImmediate((int32_t)C)) {
4763 // Constant does not fit, try adjusting it by one.
4764 switch (CC) {
4765 default: break;
4766 case ISD::SETLT:
4767 case ISD::SETGE:
4768 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4770 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4771 }
4772 break;
4773 case ISD::SETULT:
4774 case ISD::SETUGE:
4775 if (C != 0 && isLegalICmpImmediate(C-1)) {
4777 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4778 }
4779 break;
4780 case ISD::SETLE:
4781 case ISD::SETGT:
4782 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4784 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4785 }
4786 break;
4787 case ISD::SETULE:
4788 case ISD::SETUGT:
4789 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4791 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4792 }
4793 break;
4794 }
4795 }
4796 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4798 // In ARM and Thumb-2, the compare instructions can shift their second
4799 // operand.
4801 std::swap(LHS, RHS);
4802 }
4803
4804 // Thumb1 has very limited immediate modes, so turning an "and" into a
4805 // shift can save multiple instructions.
4806 //
4807 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4808 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4809 // own. If it's the operand to an unsigned comparison with an immediate,
4810 // we can eliminate one of the shifts: we transform
4811 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4812 //
4813 // We avoid transforming cases which aren't profitable due to encoding
4814 // details:
4815 //
4816 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4817 // would not; in that case, we're essentially trading one immediate load for
4818 // another.
4819 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4820 // 3. C2 is zero; we have other code for this special case.
4821 //
4822 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4823 // instruction, since the AND is always one instruction anyway, but we could
4824 // use narrow instructions in some cases.
4825 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4826 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4827 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4828 !isSignedIntSetCC(CC)) {
4829 unsigned Mask = LHS.getConstantOperandVal(1);
4830 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4831 uint64_t RHSV = RHSC->getZExtValue();
4832 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4833 unsigned ShiftBits = llvm::countl_zero(Mask);
4834 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4835 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4836 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4837 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4838 }
4839 }
4840 }
4841
4842 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4843 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4844 // way a cmp would.
4845 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4846 // some tweaks to the heuristics for the previous and->shift transform.
4847 // FIXME: Optimize cases where the LHS isn't a shift.
4848 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4849 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4850 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4851 LHS.getConstantOperandVal(1) < 31) {
4852 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4853 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4854 DAG.getVTList(MVT::i32, MVT::i32),
4855 LHS.getOperand(0),
4856 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4857 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4858 Shift.getValue(1), SDValue());
4859 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4860 return Chain.getValue(1);
4861 }
4862
4864
4865 // If the RHS is a constant zero then the V (overflow) flag will never be
4866 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4867 // simpler for other passes (like the peephole optimiser) to deal with.
4868 if (isNullConstant(RHS)) {
4869 switch (CondCode) {
4870 default: break;
4871 case ARMCC::GE:
4873 break;
4874 case ARMCC::LT:
4876 break;
4877 }
4878 }
4879
4880 ARMISD::NodeType CompareType;
4881 switch (CondCode) {
4882 default:
4883 CompareType = ARMISD::CMP;
4884 break;
4885 case ARMCC::EQ:
4886 case ARMCC::NE:
4887 // Uses only Z Flag
4888 CompareType = ARMISD::CMPZ;
4889 break;
4890 }
4891 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4892 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4893}
4894
4895/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4896SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4897 SelectionDAG &DAG, const SDLoc &dl,
4898 bool Signaling) const {
4899 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4900 SDValue Cmp;
4901 if (!isFloatingPointZero(RHS))
4902 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4903 dl, MVT::Glue, LHS, RHS);
4904 else
4905 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4906 dl, MVT::Glue, LHS);
4907 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4908}
4909
4910/// duplicateCmp - Glue values can have only one use, so this function
4911/// duplicates a comparison node.
4912SDValue
4913ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4914 unsigned Opc = Cmp.getOpcode();
4915 SDLoc DL(Cmp);
4916 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4917 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4918
4919 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4920 Cmp = Cmp.getOperand(0);
4921 Opc = Cmp.getOpcode();
4922 if (Opc == ARMISD::CMPFP)
4923 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4924 else {
4925 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4926 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4927 }
4928 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4929}
4930
4931// This function returns three things: the arithmetic computation itself
4932// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4933// comparison and the condition code define the case in which the arithmetic
4934// computation *does not* overflow.
4935std::pair<SDValue, SDValue>
4936ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4937 SDValue &ARMcc) const {
4938 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4939
4940 SDValue Value, OverflowCmp;
4941 SDValue LHS = Op.getOperand(0);
4942 SDValue RHS = Op.getOperand(1);
4943 SDLoc dl(Op);
4944
4945 // FIXME: We are currently always generating CMPs because we don't support
4946 // generating CMN through the backend. This is not as good as the natural
4947 // CMP case because it causes a register dependency and cannot be folded
4948 // later.
4949
4950 switch (Op.getOpcode()) {
4951 default:
4952 llvm_unreachable("Unknown overflow instruction!");
4953 case ISD::SADDO:
4954 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4955 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4956 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4957 break;
4958 case ISD::UADDO:
4959 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4960 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4961 // We do not use it in the USUBO case as Value may not be used.
4962 Value = DAG.getNode(ARMISD::ADDC, dl,
4963 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4964 .getValue(0);
4965 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4966 break;
4967 case ISD::SSUBO:
4968 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4969 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4970 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4971 break;
4972 case ISD::USUBO:
4973 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4974 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4975 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4976 break;
4977 case ISD::UMULO:
4978 // We generate a UMUL_LOHI and then check if the high word is 0.
4979 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4980 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4981 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4982 LHS, RHS);
4983 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4984 DAG.getConstant(0, dl, MVT::i32));
4985 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4986 break;
4987 case ISD::SMULO:
4988 // We generate a SMUL_LOHI and then check if all the bits of the high word
4989 // are the same as the sign bit of the low word.
4990 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4991 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4992 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4993 LHS, RHS);
4994 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4995 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4996 Value.getValue(0),
4997 DAG.getConstant(31, dl, MVT::i32)));
4998 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4999 break;
5000 } // switch (...)
5001
5002 return std::make_pair(Value, OverflowCmp);
5003}
5004
5005SDValue
5006ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
5007 // Let legalize expand this if it isn't a legal type yet.
5008 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5009 return SDValue();
5010
5011 SDValue Value, OverflowCmp;
5012 SDValue ARMcc;
5013 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
5014 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5015 SDLoc dl(Op);
5016 // We use 0 and 1 as false and true values.
5017 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
5018 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
5019 EVT VT = Op.getValueType();
5020
5021 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
5022 ARMcc, CCR, OverflowCmp);
5023
5024 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
5025 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5026}
5027
5029 SelectionDAG &DAG) {
5030 SDLoc DL(BoolCarry);
5031 EVT CarryVT = BoolCarry.getValueType();
5032
5033 // This converts the boolean value carry into the carry flag by doing
5034 // ARMISD::SUBC Carry, 1
5035 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5036 DAG.getVTList(CarryVT, MVT::i32),
5037 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5038 return Carry.getValue(1);
5039}
5040
5042 SelectionDAG &DAG) {
5043 SDLoc DL(Flags);
5044
5045 // Now convert the carry flag into a boolean carry. We do this
5046 // using ARMISD:ADDE 0, 0, Carry
5047 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5048 DAG.getConstant(0, DL, MVT::i32),
5049 DAG.getConstant(0, DL, MVT::i32), Flags);
5050}
5051
5052SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5053 SelectionDAG &DAG) const {
5054 // Let legalize expand this if it isn't a legal type yet.
5055 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5056 return SDValue();
5057
5058 SDValue LHS = Op.getOperand(0);
5059 SDValue RHS = Op.getOperand(1);
5060 SDLoc dl(Op);
5061
5062 EVT VT = Op.getValueType();
5063 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5064 SDValue Value;
5065 SDValue Overflow;
5066 switch (Op.getOpcode()) {
5067 default:
5068 llvm_unreachable("Unknown overflow instruction!");
5069 case ISD::UADDO:
5070 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5071 // Convert the carry flag into a boolean value.
5072 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5073 break;
5074 case ISD::USUBO: {
5075 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5076 // Convert the carry flag into a boolean value.
5077 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5078 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5079 // value. So compute 1 - C.
5080 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5081 DAG.getConstant(1, dl, MVT::i32), Overflow);
5082 break;
5083 }
5084 }
5085
5086 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5087}
5088
5090 const ARMSubtarget *Subtarget) {
5091 EVT VT = Op.getValueType();
5092 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5093 return SDValue();
5094 if (!VT.isSimple())
5095 return SDValue();
5096
5097 unsigned NewOpcode;
5098 switch (VT.getSimpleVT().SimpleTy) {
5099 default:
5100 return SDValue();
5101 case MVT::i8:
5102 switch (Op->getOpcode()) {
5103 case ISD::UADDSAT:
5104 NewOpcode = ARMISD::UQADD8b;
5105 break;
5106 case ISD::SADDSAT:
5107 NewOpcode = ARMISD::QADD8b;
5108 break;
5109 case ISD::USUBSAT:
5110 NewOpcode = ARMISD::UQSUB8b;
5111 break;
5112 case ISD::SSUBSAT:
5113 NewOpcode = ARMISD::QSUB8b;
5114 break;
5115 }
5116 break;
5117 case MVT::i16:
5118 switch (Op->getOpcode()) {
5119 case ISD::UADDSAT:
5120 NewOpcode = ARMISD::UQADD16b;
5121 break;
5122 case ISD::SADDSAT:
5123 NewOpcode = ARMISD::QADD16b;
5124 break;
5125 case ISD::USUBSAT:
5126 NewOpcode = ARMISD::UQSUB16b;
5127 break;
5128 case ISD::SSUBSAT:
5129 NewOpcode = ARMISD::QSUB16b;
5130 break;
5131 }
5132 break;
5133 }
5134
5135 SDLoc dl(Op);
5136 SDValue Add =
5137 DAG.getNode(NewOpcode, dl, MVT::i32,
5138 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5139 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5140 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5141}
5142
5143SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5144 SDValue Cond = Op.getOperand(0);
5145 SDValue SelectTrue = Op.getOperand(1);
5146 SDValue SelectFalse = Op.getOperand(2);
5147 SDLoc dl(Op);
5148 unsigned Opc = Cond.getOpcode();
5149
5150 if (Cond.getResNo() == 1 &&
5151 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5152 Opc == ISD::USUBO)) {
5153 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5154 return SDValue();
5155
5156 SDValue Value, OverflowCmp;
5157 SDValue ARMcc;
5158 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5159 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5160 EVT VT = Op.getValueType();
5161
5162 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5163 OverflowCmp, DAG);
5164 }
5165
5166 // Convert:
5167 //
5168 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5169 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5170 //
5171 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5172 const ConstantSDNode *CMOVTrue =
5173 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5174 const ConstantSDNode *CMOVFalse =
5175 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5176
5177 if (CMOVTrue && CMOVFalse) {
5178 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5179 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5180
5181 SDValue True;
5182 SDValue False;
5183 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5184 True = SelectTrue;
5185 False = SelectFalse;
5186 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5187 True = SelectFalse;
5188 False = SelectTrue;
5189 }
5190
5191 if (True.getNode() && False.getNode()) {
5192 EVT VT = Op.getValueType();
5193 SDValue ARMcc = Cond.getOperand(2);
5194 SDValue CCR = Cond.getOperand(3);
5195 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5196 assert(True.getValueType() == VT);
5197 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5198 }
5199 }
5200 }
5201
5202 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5203 // undefined bits before doing a full-word comparison with zero.
5204 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5205 DAG.getConstant(1, dl, Cond.getValueType()));
5206
5207 return DAG.getSelectCC(dl, Cond,
5208 DAG.getConstant(0, dl, Cond.getValueType()),
5209 SelectTrue, SelectFalse, ISD::SETNE);
5210}
5211
5213 bool &swpCmpOps, bool &swpVselOps) {
5214 // Start by selecting the GE condition code for opcodes that return true for
5215 // 'equality'
5216 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5217 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5218 CondCode = ARMCC::GE;
5219
5220 // and GT for opcodes that return false for 'equality'.
5221 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5222 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5223 CondCode = ARMCC::GT;
5224
5225 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5226 // to swap the compare operands.
5227 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5228 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5229 swpCmpOps = true;
5230
5231 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5232 // If we have an unordered opcode, we need to swap the operands to the VSEL
5233 // instruction (effectively negating the condition).
5234 //
5235 // This also has the effect of swapping which one of 'less' or 'greater'
5236 // returns true, so we also swap the compare operands. It also switches
5237 // whether we return true for 'equality', so we compensate by picking the
5238 // opposite condition code to our original choice.
5239 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5240 CC == ISD::SETUGT) {
5241 swpCmpOps = !swpCmpOps;
5242 swpVselOps = !swpVselOps;
5243 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5244 }
5245
5246 // 'ordered' is 'anything but unordered', so use the VS condition code and
5247 // swap the VSEL operands.
5248 if (CC == ISD::SETO) {
5249 CondCode = ARMCC::VS;
5250 swpVselOps = true;
5251 }
5252
5253 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5254 // code and swap the VSEL operands. Also do this if we don't care about the
5255 // unordered case.
5256 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5257 CondCode = ARMCC::EQ;
5258 swpVselOps = true;
5259 }
5260}
5261
5262SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5263 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5264 SDValue Cmp, SelectionDAG &DAG) const {
5265 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5267 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5269 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5270
5271 SDValue TrueLow = TrueVal.getValue(0);
5272 SDValue TrueHigh = TrueVal.getValue(1);
5273 SDValue FalseLow = FalseVal.getValue(0);
5274 SDValue FalseHigh = FalseVal.getValue(1);
5275
5276 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5277 ARMcc, CCR, Cmp);
5278 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5279 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5280
5281 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5282 } else {
5283 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5284 Cmp);
5285 }
5286}
5287
5289 return CC == ISD::SETGT || CC == ISD::SETGE;
5290}
5291
5293 return CC == ISD::SETLT || CC == ISD::SETLE;
5294}
5295
5296// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5297// All of these conditions (and their <= and >= counterparts) will do:
5298// x < k ? k : x
5299// x > k ? x : k
5300// k < x ? x : k
5301// k > x ? k : x
5302static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5303 const SDValue TrueVal, const SDValue FalseVal,
5304 const ISD::CondCode CC, const SDValue K) {
5305 return (isGTorGE(CC) &&
5306 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5307 (isLTorLE(CC) &&
5308 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5309}
5310
5311// Check if two chained conditionals could be converted into SSAT or USAT.
5312//
5313// SSAT can replace a set of two conditional selectors that bound a number to an
5314// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5315//
5316// x < -k ? -k : (x > k ? k : x)
5317// x < -k ? -k : (x < k ? x : k)
5318// x > -k ? (x > k ? k : x) : -k
5319// x < k ? (x < -k ? -k : x) : k
5320// etc.
5321//
5322// LLVM canonicalizes these to either a min(max()) or a max(min())
5323// pattern. This function tries to match one of these and will return a SSAT
5324// node if successful.
5325//
5326// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5327// is a power of 2.
5329 EVT VT = Op.getValueType();
5330 SDValue V1 = Op.getOperand(0);
5331 SDValue K1 = Op.getOperand(1);
5332 SDValue TrueVal1 = Op.getOperand(2);
5333 SDValue FalseVal1 = Op.getOperand(3);
5334 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5335
5336 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5337 if (Op2.getOpcode() != ISD::SELECT_CC)
5338 return SDValue();
5339
5340 SDValue V2 = Op2.getOperand(0);
5341 SDValue K2 = Op2.getOperand(1);
5342 SDValue TrueVal2 = Op2.getOperand(2);
5343 SDValue FalseVal2 = Op2.getOperand(3);
5344 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5345
5346 SDValue V1Tmp = V1;
5347 SDValue V2Tmp = V2;
5348
5349 // Check that the registers and the constants match a max(min()) or min(max())
5350 // pattern
5351 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5352 K2 != FalseVal2 ||
5353 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5354 return SDValue();
5355
5356 // Check that the constant in the lower-bound check is
5357 // the opposite of the constant in the upper-bound check
5358 // in 1's complement.
5359 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5360 return SDValue();
5361
5362 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5363 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5364 int64_t PosVal = std::max(Val1, Val2);
5365 int64_t NegVal = std::min(Val1, Val2);
5366
5367 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5368 !isPowerOf2_64(PosVal + 1))
5369 return SDValue();
5370
5371 // Handle the difference between USAT (unsigned) and SSAT (signed)
5372 // saturation
5373 // At this point, PosVal is guaranteed to be positive
5374 uint64_t K = PosVal;
5375 SDLoc dl(Op);
5376 if (Val1 == ~Val2)
5377 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5378 DAG.getConstant(llvm::countr_one(K), dl, VT));
5379 if (NegVal == 0)
5380 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5381 DAG.getConstant(llvm::countr_one(K), dl, VT));
5382
5383 return SDValue();
5384}
5385
5386// Check if a condition of the type x < k ? k : x can be converted into a
5387// bit operation instead of conditional moves.
5388// Currently this is allowed given:
5389// - The conditions and values match up
5390// - k is 0 or -1 (all ones)
5391// This function will not check the last condition, thats up to the caller
5392// It returns true if the transformation can be made, and in such case
5393// returns x in V, and k in SatK.
5395 SDValue &SatK)
5396{
5397 SDValue LHS = Op.getOperand(0);
5398 SDValue RHS = Op.getOperand(1);
5399 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5400 SDValue TrueVal = Op.getOperand(2);
5401 SDValue FalseVal = Op.getOperand(3);
5402
5403 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5404 ? &RHS
5405 : nullptr;
5406
5407 // No constant operation in comparison, early out
5408 if (!K)
5409 return false;
5410
5411 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5412 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5413 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5414
5415 // If the constant on left and right side, or variable on left and right,
5416 // does not match, early out
5417 if (*K != KTmp || V != VTmp)
5418 return false;
5419
5420 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5421 SatK = *K;
5422 return true;
5423 }
5424
5425 return false;
5426}
5427
5428bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5429 if (VT == MVT::f32)
5430 return !Subtarget->hasVFP2Base();
5431 if (VT == MVT::f64)
5432 return !Subtarget->hasFP64();
5433 if (VT == MVT::f16)
5434 return !Subtarget->hasFullFP16();
5435 return false;
5436}
5437
5438SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5439 EVT VT = Op.getValueType();
5440 SDLoc dl(Op);
5441
5442 // Try to convert two saturating conditional selects into a single SSAT
5443 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5444 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5445 return SatValue;
5446
5447 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5448 // into more efficient bit operations, which is possible when k is 0 or -1
5449 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5450 // single instructions. On Thumb the shift and the bit operation will be two
5451 // instructions.
5452 // Only allow this transformation on full-width (32-bit) operations
5453 SDValue LowerSatConstant;
5454 SDValue SatValue;
5455 if (VT == MVT::i32 &&
5456 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5457 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5458 DAG.getConstant(31, dl, VT));
5459 if (isNullConstant(LowerSatConstant)) {
5460 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5461 DAG.getAllOnesConstant(dl, VT));
5462 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5463 } else if (isAllOnesConstant(LowerSatConstant))
5464 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5465 }
5466
5467 SDValue LHS = Op.getOperand(0);
5468 SDValue RHS = Op.getOperand(1);
5469 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5470 SDValue TrueVal = Op.getOperand(2);
5471 SDValue FalseVal = Op.getOperand(3);
5472 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5473 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5474
5475 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5476 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5477 unsigned TVal = CTVal->getZExtValue();
5478 unsigned FVal = CFVal->getZExtValue();
5479 unsigned Opcode = 0;
5480
5481 if (TVal == ~FVal) {
5482 Opcode = ARMISD::CSINV;
5483 } else if (TVal == ~FVal + 1) {
5484 Opcode = ARMISD::CSNEG;
5485 } else if (TVal + 1 == FVal) {
5486 Opcode = ARMISD::CSINC;
5487 } else if (TVal == FVal + 1) {
5488 Opcode = ARMISD::CSINC;
5489 std::swap(TrueVal, FalseVal);
5490 std::swap(TVal, FVal);
5491 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5492 }
5493
5494 if (Opcode) {
5495 // If one of the constants is cheaper than another, materialise the
5496 // cheaper one and let the csel generate the other.
5497 if (Opcode != ARMISD::CSINC &&
5498 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5499 std::swap(TrueVal, FalseVal);
5500 std::swap(TVal, FVal);
5501 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5502 }
5503
5504 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5505 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5506 // -(-a) == a, but (a+1)+1 != a).
5507 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5508 std::swap(TrueVal, FalseVal);
5509 std::swap(TVal, FVal);
5510 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5511 }
5512
5513 // Drops F's value because we can get it by inverting/negating TVal.
5514 FalseVal = TrueVal;
5515
5516 SDValue ARMcc;
5517 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5518 EVT VT = TrueVal.getValueType();
5519 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5520 }
5521 }
5522
5523 if (isUnsupportedFloatingType(LHS.getValueType())) {
5525 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5526
5527 // If softenSetCCOperands only returned one value, we should compare it to
5528 // zero.
5529 if (!RHS.getNode()) {
5530 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5531 CC = ISD::SETNE;
5532 }
5533 }
5534
5535 if (LHS.getValueType() == MVT::i32) {
5536 // Try to generate VSEL on ARMv8.
5537 // The VSEL instruction can't use all the usual ARM condition
5538 // codes: it only has two bits to select the condition code, so it's
5539 // constrained to use only GE, GT, VS and EQ.
5540 //
5541 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5542 // swap the operands of the previous compare instruction (effectively
5543 // inverting the compare condition, swapping 'less' and 'greater') and
5544 // sometimes need to swap the operands to the VSEL (which inverts the
5545 // condition in the sense of firing whenever the previous condition didn't)
5546 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5547 TrueVal.getValueType() == MVT::f32 ||
5548 TrueVal.getValueType() == MVT::f64)) {
5550 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5551 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5552 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5553 std::swap(TrueVal, FalseVal);
5554 }
5555 }
5556
5557 SDValue ARMcc;
5558 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5559 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5560 // Choose GE over PL, which vsel does now support
5561 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5562 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5563 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5564 }
5565
5566 ARMCC::CondCodes CondCode, CondCode2;
5567 FPCCToARMCC(CC, CondCode, CondCode2);
5568
5569 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5570 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5571 // must use VSEL (limited condition codes), due to not having conditional f16
5572 // moves.
5573 if (Subtarget->hasFPARMv8Base() &&
5574 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5575 (TrueVal.getValueType() == MVT::f16 ||
5576 TrueVal.getValueType() == MVT::f32 ||
5577 TrueVal.getValueType() == MVT::f64)) {
5578 bool swpCmpOps = false;
5579 bool swpVselOps = false;
5580 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5581
5582 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5583 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5584 if (swpCmpOps)
5585 std::swap(LHS, RHS);
5586 if (swpVselOps)
5587 std::swap(TrueVal, FalseVal);
5588 }
5589 }
5590
5591 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5592 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5593 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5594 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5595 if (CondCode2 != ARMCC::AL) {
5596 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5597 // FIXME: Needs another CMP because flag can have but one use.
5598 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5599 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5600 }
5601 return Result;
5602}
5603
5604/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5605/// to morph to an integer compare sequence.
5606static bool canChangeToInt(SDValue Op, bool &SeenZero,
5607 const ARMSubtarget *Subtarget) {
5608 SDNode *N = Op.getNode();
5609 if (!N->hasOneUse())
5610 // Otherwise it requires moving the value from fp to integer registers.
5611 return false;
5612 if (!N->getNumValues())
5613 return false;
5614 EVT VT = Op.getValueType();
5615 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5616 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5617 // vmrs are very slow, e.g. cortex-a8.
5618 return false;
5619
5620 if (isFloatingPointZero(Op)) {
5621 SeenZero = true;
5622 return true;
5623 }
5624 return ISD::isNormalLoad(N);
5625}
5626
5629 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5630
5631 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5632 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5633 Ld->getPointerInfo(), Ld->getAlign(),
5634 Ld->getMemOperand()->getFlags());
5635
5636 llvm_unreachable("Unknown VFP cmp argument!");
5637}
5638
5640 SDValue &RetVal1, SDValue &RetVal2) {
5641 SDLoc dl(Op);
5642
5643 if (isFloatingPointZero(Op)) {
5644 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5645 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5646 return;
5647 }
5648
5649 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5650 SDValue Ptr = Ld->getBasePtr();
5651 RetVal1 =
5652 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5653 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5654
5655 EVT PtrType = Ptr.getValueType();
5656 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5657 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5658 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5659 Ld->getPointerInfo().getWithOffset(4),
5660 commonAlignment(Ld->getAlign(), 4),
5661 Ld->getMemOperand()->getFlags());
5662 return;
5663 }
5664
5665 llvm_unreachable("Unknown VFP cmp argument!");
5666}
5667
5668/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5669/// f32 and even f64 comparisons to integer ones.
5670SDValue
5671ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5672 SDValue Chain = Op.getOperand(0);
5673 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5674 SDValue LHS = Op.getOperand(2);
5675 SDValue RHS = Op.getOperand(3);
5676 SDValue Dest = Op.getOperand(4);
5677 SDLoc dl(Op);
5678
5679 bool LHSSeenZero = false;
5680 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5681 bool RHSSeenZero = false;
5682 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5683 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5684 // If unsafe fp math optimization is enabled and there are no other uses of
5685 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5686 // to an integer comparison.
5687 if (CC == ISD::SETOEQ)
5688 CC = ISD::SETEQ;
5689 else if (CC == ISD::SETUNE)
5690 CC = ISD::SETNE;
5691
5692 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5693 SDValue ARMcc;
5694 if (LHS.getValueType() == MVT::f32) {
5695 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5696 bitcastf32Toi32(LHS, DAG), Mask);
5697 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5698 bitcastf32Toi32(RHS, DAG), Mask);
5699 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5700 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5701 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5702 Chain, Dest, ARMcc, CCR, Cmp);
5703 }
5704
5705 SDValue LHS1, LHS2;
5706 SDValue RHS1, RHS2;
5707 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5708 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5709 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5710 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5712 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5713 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5714 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5715 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5716 }
5717
5718 return SDValue();
5719}
5720
5721SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5722 SDValue Chain = Op.getOperand(0);
5723 SDValue Cond = Op.getOperand(1);
5724 SDValue Dest = Op.getOperand(2);
5725 SDLoc dl(Op);
5726
5727 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5728 // instruction.
5729 unsigned Opc = Cond.getOpcode();
5730 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5731 !Subtarget->isThumb1Only();
5732 if (Cond.getResNo() == 1 &&
5733 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5734 Opc == ISD::USUBO || OptimizeMul)) {
5735 // Only lower legal XALUO ops.
5736 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5737 return SDValue();
5738
5739 // The actual operation with overflow check.
5740 SDValue Value, OverflowCmp;
5741 SDValue ARMcc;
5742 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5743
5744 // Reverse the condition code.
5746 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5748 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5749 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5750
5751 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5752 OverflowCmp);
5753 }
5754
5755 return SDValue();
5756}
5757
5758SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5759 SDValue Chain = Op.getOperand(0);
5760 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5761 SDValue LHS = Op.getOperand(2);
5762 SDValue RHS = Op.getOperand(3);
5763 SDValue Dest = Op.getOperand(4);
5764 SDLoc dl(Op);
5765
5766 if (isUnsupportedFloatingType(LHS.getValueType())) {
5768 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5769
5770 // If softenSetCCOperands only returned one value, we should compare it to
5771 // zero.
5772 if (!RHS.getNode()) {
5773 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5774 CC = ISD::SETNE;
5775 }
5776 }
5777
5778 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5779 // instruction.
5780 unsigned Opc = LHS.getOpcode();
5781 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5782 !Subtarget->isThumb1Only();
5783 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5784 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5785 Opc == ISD::USUBO || OptimizeMul) &&
5786 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5787 // Only lower legal XALUO ops.
5788 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5789 return SDValue();
5790
5791 // The actual operation with overflow check.
5792 SDValue Value, OverflowCmp;
5793 SDValue ARMcc;
5794 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5795
5796 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5797 // Reverse the condition code.
5799 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5801 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5802 }
5803 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5804
5805 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5806 OverflowCmp);
5807 }
5808
5809 if (LHS.getValueType() == MVT::i32) {
5810 SDValue ARMcc;
5811 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5812 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5813 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5814 Chain, Dest, ARMcc, CCR, Cmp);
5815 }
5816
5817 if (getTargetMachine().Options.UnsafeFPMath &&
5818 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5819 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5820 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5821 return Result;
5822 }
5823
5824 ARMCC::CondCodes CondCode, CondCode2;
5825 FPCCToARMCC(CC, CondCode, CondCode2);
5826
5827 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5828 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5829 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5830 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5831 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5832 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5833 if (CondCode2 != ARMCC::AL) {
5834 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5835 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5836 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5837 }
5838 return Res;
5839}
5840
5841SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5842 SDValue Chain = Op.getOperand(0);
5843 SDValue Table = Op.getOperand(1);
5844 SDValue Index = Op.getOperand(2);
5845 SDLoc dl(Op);
5846
5847 EVT PTy = getPointerTy(DAG.getDataLayout());
5848 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5849 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5850 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5851 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5852 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5853 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5854 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5855 // which does another jump to the destination. This also makes it easier
5856 // to translate it to TBB / TBH later (Thumb2 only).
5857 // FIXME: This might not work if the function is extremely large.
5858 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5859 Addr, Op.getOperand(2), JTI);
5860 }
5861 if (isPositionIndependent() || Subtarget->isROPI()) {
5862 Addr =
5863 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5865 Chain = Addr.getValue(1);
5866 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5867 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5868 } else {
5869 Addr =
5870 DAG.getLoad(PTy, dl, Chain, Addr,
5872 Chain = Addr.getValue(1);
5873 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5874 }
5875}
5876
5878 EVT VT = Op.getValueType();
5879 SDLoc dl(Op);
5880
5881 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5882 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5883 return Op;
5884 return DAG.UnrollVectorOp(Op.getNode());
5885 }
5886
5887 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5888
5889 EVT NewTy;
5890 const EVT OpTy = Op.getOperand(0).getValueType();
5891 if (OpTy == MVT::v4f32)
5892 NewTy = MVT::v4i32;
5893 else if (OpTy == MVT::v4f16 && HasFullFP16)
5894 NewTy = MVT::v4i16;
5895 else if (OpTy == MVT::v8f16 && HasFullFP16)
5896 NewTy = MVT::v8i16;
5897 else
5898 llvm_unreachable("Invalid type for custom lowering!");
5899
5900 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5901 return DAG.UnrollVectorOp(Op.getNode());
5902
5903 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5904 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5905}
5906
5907SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5908 EVT VT = Op.getValueType();
5909 if (VT.isVector())
5910 return LowerVectorFP_TO_INT(Op, DAG);
5911
5912 bool IsStrict = Op->isStrictFPOpcode();
5913 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5914
5915 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5916 RTLIB::Libcall LC;
5917 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5918 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5919 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5920 Op.getValueType());
5921 else
5922 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5923 Op.getValueType());
5924 SDLoc Loc(Op);
5925 MakeLibCallOptions CallOptions;
5926 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5928 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5929 CallOptions, Loc, Chain);
5930 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5931 }
5932
5933 // FIXME: Remove this when we have strict fp instruction selection patterns
5934 if (IsStrict) {
5935 SDLoc Loc(Op);
5936 SDValue Result =
5939 Loc, Op.getValueType(), SrcVal);
5940 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5941 }
5942
5943 return Op;
5944}
5945
5947 const ARMSubtarget *Subtarget) {
5948 EVT VT = Op.getValueType();
5949 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5950 EVT FromVT = Op.getOperand(0).getValueType();
5951
5952 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5953 return Op;
5954 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5955 Subtarget->hasFP64())
5956 return Op;
5957 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5958 Subtarget->hasFullFP16())
5959 return Op;
5960 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5961 Subtarget->hasMVEFloatOps())
5962 return Op;
5963 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5964 Subtarget->hasMVEFloatOps())
5965 return Op;
5966
5967 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5968 return SDValue();
5969
5970 SDLoc DL(Op);
5971 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5972 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5973 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5974 DAG.getValueType(VT.getScalarType()));
5975 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5976 DAG.getConstant((1 << BW) - 1, DL, VT));
5977 if (IsSigned)
5978 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5979 DAG.getConstant(-(1 << BW), DL, VT));
5980 return Max;
5981}
5982
5984 EVT VT = Op.getValueType();
5985 SDLoc dl(Op);
5986
5987 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5988 if (VT.getVectorElementType() == MVT::f32)
5989 return Op;
5990 return DAG.UnrollVectorOp(Op.getNode());
5991 }
5992
5993 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5994 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5995 "Invalid type for custom lowering!");
5996
5997 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5998
5999 EVT DestVecType;
6000 if (VT == MVT::v4f32)
6001 DestVecType = MVT::v4i32;
6002 else if (VT == MVT::v4f16 && HasFullFP16)
6003 DestVecType = MVT::v4i16;
6004 else if (VT == MVT::v8f16 && HasFullFP16)
6005 DestVecType = MVT::v8i16;
6006 else
6007 return DAG.UnrollVectorOp(Op.getNode());
6008
6009 unsigned CastOpc;
6010 unsigned Opc;
6011 switch (Op.getOpcode()) {
6012 default: llvm_unreachable("Invalid opcode!");
6013 case ISD::SINT_TO_FP:
6014 CastOpc = ISD::SIGN_EXTEND;
6015 Opc = ISD::SINT_TO_FP;
6016 break;
6017 case ISD::UINT_TO_FP:
6018 CastOpc = ISD::ZERO_EXTEND;
6019 Opc = ISD::UINT_TO_FP;
6020 break;
6021 }
6022
6023 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
6024 return DAG.getNode(Opc, dl, VT, Op);
6025}
6026
6027SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
6028 EVT VT = Op.getValueType();
6029 if (VT.isVector())
6030 return LowerVectorINT_TO_FP(Op, DAG);
6031 if (isUnsupportedFloatingType(VT)) {
6032 RTLIB::Libcall LC;
6033 if (Op.getOpcode() == ISD::SINT_TO_FP)
6034 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6035 Op.getValueType());
6036 else
6037 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6038 Op.getValueType());
6039 MakeLibCallOptions CallOptions;
6040 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6041 CallOptions, SDLoc(Op)).first;
6042 }
6043
6044 return Op;
6045}
6046
6047SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6048 // Implement fcopysign with a fabs and a conditional fneg.
6049 SDValue Tmp0 = Op.getOperand(0);
6050 SDValue Tmp1 = Op.getOperand(1);
6051 SDLoc dl(Op);
6052 EVT VT = Op.getValueType();
6053 EVT SrcVT = Tmp1.getValueType();
6054 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6055 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6056 bool UseNEON = !InGPR && Subtarget->hasNEON();
6057
6058 if (UseNEON) {
6059 // Use VBSL to copy the sign bit.
6060 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6061 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6062 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6063 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6064 if (VT == MVT::f64)
6065 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6066 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6067 DAG.getConstant(32, dl, MVT::i32));
6068 else /*if (VT == MVT::f32)*/
6069 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6070 if (SrcVT == MVT::f32) {
6071 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6072 if (VT == MVT::f64)
6073 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6074 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6075 DAG.getConstant(32, dl, MVT::i32));
6076 } else if (VT == MVT::f32)
6077 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6078 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6079 DAG.getConstant(32, dl, MVT::i32));
6080 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6081 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6082
6084 dl, MVT::i32);
6085 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6086 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6087 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6088
6089 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6090 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6091 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6092 if (VT == MVT::f32) {
6093 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6094 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6095 DAG.getConstant(0, dl, MVT::i32));
6096 } else {
6097 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6098 }
6099
6100 return Res;
6101 }
6102
6103 // Bitcast operand 1 to i32.
6104 if (SrcVT == MVT::f64)
6105 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6106 Tmp1).getValue(1);
6107 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6108
6109 // Or in the signbit with integer operations.
6110 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6111 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6112 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6113 if (VT == MVT::f32) {
6114 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6115 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6116 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6117 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6118 }
6119
6120 // f64: Or the high part with signbit and then combine two parts.
6121 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6122 Tmp0);
6123 SDValue Lo = Tmp0.getValue(0);
6124 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6125 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6126 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6127}
6128
6129SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6131 MachineFrameInfo &MFI = MF.getFrameInfo();
6132 MFI.setReturnAddressIsTaken(true);
6133
6135 return SDValue();
6136
6137 EVT VT = Op.getValueType();
6138 SDLoc dl(Op);
6139 unsigned Depth = Op.getConstantOperandVal(0);
6140 if (Depth) {
6141 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6142 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6143 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6144 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6146 }
6147
6148 // Return LR, which contains the return address. Mark it an implicit live-in.
6149 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6150 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6151}
6152
6153SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6154 const ARMBaseRegisterInfo &ARI =
6155 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6157 MachineFrameInfo &MFI = MF.getFrameInfo();
6158 MFI.setFrameAddressIsTaken(true);
6159
6160 EVT VT = Op.getValueType();
6161 SDLoc dl(Op); // FIXME probably not meaningful
6162 unsigned Depth = Op.getConstantOperandVal(0);
6163 Register FrameReg = ARI.getFrameRegister(MF);
6164 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6165 while (Depth--)
6166 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6168 return FrameAddr;
6169}
6170
6171// FIXME? Maybe this could be a TableGen attribute on some registers and
6172// this table could be generated automatically from RegInfo.
6173Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6174 const MachineFunction &MF) const {
6176 .Case("sp", ARM::SP)
6177 .Default(0);
6178 if (Reg)
6179 return Reg;
6180 report_fatal_error(Twine("Invalid register name \""
6181 + StringRef(RegName) + "\"."));
6182}
6183
6184// Result is 64 bit value so split into two 32 bit values and return as a
6185// pair of values.
6187 SelectionDAG &DAG) {
6188 SDLoc DL(N);
6189
6190 // This function is only supposed to be called for i64 type destination.
6191 assert(N->getValueType(0) == MVT::i64
6192 && "ExpandREAD_REGISTER called for non-i64 type result.");
6193
6195 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6196 N->getOperand(0),
6197 N->getOperand(1));
6198
6199 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6200 Read.getValue(1)));
6201 Results.push_back(Read.getOperand(0));
6202}
6203
6204/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6205/// When \p DstVT, the destination type of \p BC, is on the vector
6206/// register bank and the source of bitcast, \p Op, operates on the same bank,
6207/// it might be possible to combine them, such that everything stays on the
6208/// vector register bank.
6209/// \p return The node that would replace \p BT, if the combine
6210/// is possible.
6212 SelectionDAG &DAG) {
6213 SDValue Op = BC->getOperand(0);
6214 EVT DstVT = BC->getValueType(0);
6215
6216 // The only vector instruction that can produce a scalar (remember,
6217 // since the bitcast was about to be turned into VMOVDRR, the source
6218 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6219 // Moreover, we can do this combine only if there is one use.
6220 // Finally, if the destination type is not a vector, there is not
6221 // much point on forcing everything on the vector bank.
6222 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6223 !Op.hasOneUse())
6224 return SDValue();
6225
6226 // If the index is not constant, we will introduce an additional
6227 // multiply that will stick.
6228 // Give up in that case.
6229 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6230 if (!Index)
6231 return SDValue();
6232 unsigned DstNumElt = DstVT.getVectorNumElements();
6233
6234 // Compute the new index.
6235 const APInt &APIntIndex = Index->getAPIntValue();
6236 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6237 NewIndex *= APIntIndex;
6238 // Check if the new constant index fits into i32.
6239 if (NewIndex.getBitWidth() > 32)
6240 return SDValue();
6241
6242 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6243 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6244 SDLoc dl(Op);
6245 SDValue ExtractSrc = Op.getOperand(0);
6246 EVT VecVT = EVT::getVectorVT(
6247 *DAG.getContext(), DstVT.getScalarType(),
6248 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6249 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6250 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6251 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6252}
6253
6254/// ExpandBITCAST - If the target supports VFP, this function is called to
6255/// expand a bit convert where either the source or destination type is i64 to
6256/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6257/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6258/// vectors), since the legalizer won't know what to do with that.
6259SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6260 const ARMSubtarget *Subtarget) const {
6261 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6262 SDLoc dl(N);
6263 SDValue Op = N->getOperand(0);
6264
6265 // This function is only supposed to be called for i16 and i64 types, either
6266 // as the source or destination of the bit convert.
6267 EVT SrcVT = Op.getValueType();
6268 EVT DstVT = N->getValueType(0);
6269
6270 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6271 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6272 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6273 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6274
6275 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6276 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6277 return DAG.getNode(
6278 ISD::TRUNCATE, SDLoc(N), DstVT,
6279 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6280
6281 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6282 return SDValue();
6283
6284 // Turn i64->f64 into VMOVDRR.
6285 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6286 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6287 // if we can combine the bitcast with its source.
6289 return Val;
6290 SDValue Lo, Hi;
6291 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6292 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6293 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6294 }
6295
6296 // Turn f64->i64 into VMOVRRD.
6297 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6298 SDValue Cvt;
6299 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6300 SrcVT.getVectorNumElements() > 1)
6301 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6302 DAG.getVTList(MVT::i32, MVT::i32),
6303 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6304 else
6305 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6306 DAG.getVTList(MVT::i32, MVT::i32), Op);
6307 // Merge the pieces into a single i64 value.
6308 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6309 }
6310
6311 return SDValue();
6312}
6313
6314/// getZeroVector - Returns a vector of specified type with all zero elements.
6315/// Zero vectors are used to represent vector negation and in those cases
6316/// will be implemented with the NEON VNEG instruction. However, VNEG does
6317/// not support i64 elements, so sometimes the zero vectors will need to be
6318/// explicitly constructed. Regardless, use a canonical VMOV to create the
6319/// zero vector.
6320static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6321 assert(VT.isVector() && "Expected a vector type");
6322 // The canonical modified immediate encoding of a zero vector is....0!
6323 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6324 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6325 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6326 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6327}
6328
6329/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6330/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6331SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6332 SelectionDAG &DAG) const {
6333 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6334 EVT VT = Op.getValueType();
6335 unsigned VTBits = VT.getSizeInBits();
6336 SDLoc dl(Op);
6337 SDValue ShOpLo = Op.getOperand(0);
6338 SDValue ShOpHi = Op.getOperand(1);
6339 SDValue ShAmt = Op.getOperand(2);
6340 SDValue ARMcc;
6341 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6342 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6343
6344 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6345
6346 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6347 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6348 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6349 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6350 DAG.getConstant(VTBits, dl, MVT::i32));
6351 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6352 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6353 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6354 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6355 ISD::SETGE, ARMcc, DAG, dl);
6356 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6357 ARMcc, CCR, CmpLo);
6358
6359 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6360 SDValue HiBigShift = Opc == ISD::SRA
6361 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6362 DAG.getConstant(VTBits - 1, dl, VT))
6363 : DAG.getConstant(0, dl, VT);
6364 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6365 ISD::SETGE, ARMcc, DAG, dl);
6366 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6367 ARMcc, CCR, CmpHi);
6368
6369 SDValue Ops[2] = { Lo, Hi };
6370 return DAG.getMergeValues(Ops, dl);
6371}
6372
6373/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6374/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6375SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6376 SelectionDAG &DAG) const {
6377 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6378 EVT VT = Op.getValueType();
6379 unsigned VTBits = VT.getSizeInBits();
6380 SDLoc dl(Op);
6381 SDValue ShOpLo = Op.getOperand(0);
6382 SDValue ShOpHi = Op.getOperand(1);
6383 SDValue ShAmt = Op.getOperand(2);
6384 SDValue ARMcc;
6385 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6386
6387 assert(Op.getOpcode() == ISD::SHL_PARTS);
6388 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6389 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6390 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6391 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6392 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6393
6394 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6395 DAG.getConstant(VTBits, dl, MVT::i32));
6396 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6397 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6398 ISD::SETGE, ARMcc, DAG, dl);
6399 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6400 ARMcc, CCR, CmpHi);
6401
6402 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6403 ISD::SETGE, ARMcc, DAG, dl);
6404 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6405 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6406 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6407
6408 SDValue Ops[2] = { Lo, Hi };
6409 return DAG.getMergeValues(Ops, dl);
6410}
6411
6412SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6413 SelectionDAG &DAG) const {
6414 // The rounding mode is in bits 23:22 of the FPSCR.
6415 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6416 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6417 // so that the shift + and get folded into a bitfield extract.
6418 SDLoc dl(Op);
6419 SDValue Chain = Op.getOperand(0);
6420 SDValue Ops[] = {Chain,
6421 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6422
6423 SDValue FPSCR =
6424 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6425 Chain = FPSCR.getValue(1);
6426 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6427 DAG.getConstant(1U << 22, dl, MVT::i32));
6428 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6429 DAG.getConstant(22, dl, MVT::i32));
6430 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6431 DAG.getConstant(3, dl, MVT::i32));
6432 return DAG.getMergeValues({And, Chain}, dl);
6433}
6434
6435SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6436 SelectionDAG &DAG) const {
6437 SDLoc DL(Op);
6438 SDValue Chain = Op->getOperand(0);
6439 SDValue RMValue = Op->getOperand(1);
6440
6441 // The rounding mode is in bits 23:22 of the FPSCR.
6442 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6443 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6444 // ((arg - 1) & 3) << 22).
6445 //
6446 // It is expected that the argument of llvm.set.rounding is within the
6447 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6448 // responsibility of the code generated llvm.set.rounding to ensure this
6449 // condition.
6450
6451 // Calculate new value of FPSCR[23:22].
6452 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6453 DAG.getConstant(1, DL, MVT::i32));
6454 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6455 DAG.getConstant(0x3, DL, MVT::i32));
6456 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6457 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6458
6459 // Get current value of FPSCR.
6460 SDValue Ops[] = {Chain,
6461 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6462 SDValue FPSCR =
6463 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6464 Chain = FPSCR.getValue(1);
6465 FPSCR = FPSCR.getValue(0);
6466
6467 // Put new rounding mode into FPSCR[23:22].
6468 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6469 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6470 DAG.getConstant(RMMask, DL, MVT::i32));
6471 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6472 SDValue Ops2[] = {
6473 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6474 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6475}
6476
6477SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6478 SelectionDAG &DAG) const {
6479 SDLoc DL(Op);
6480 SDValue Chain = Op->getOperand(0);
6481 SDValue Mode = Op->getOperand(1);
6482
6483 // Generate nodes to build:
6484 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6485 SDValue Ops[] = {Chain,
6486 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6487 SDValue FPSCR =
6488 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6489 Chain = FPSCR.getValue(1);
6490 FPSCR = FPSCR.getValue(0);
6491
6492 SDValue FPSCRMasked =
6493 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6494 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6495 SDValue InputMasked =
6496 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6497 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6498 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6499
6500 SDValue Ops2[] = {
6501 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6502 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6503}
6504
6505SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6506 SelectionDAG &DAG) const {
6507 SDLoc DL(Op);
6508 SDValue Chain = Op->getOperand(0);
6509
6510 // To get the default FP mode all control bits are cleared:
6511 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6512 SDValue Ops[] = {Chain,
6513 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6514 SDValue FPSCR =
6515 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6516 Chain = FPSCR.getValue(1);
6517 FPSCR = FPSCR.getValue(0);
6518
6519 SDValue FPSCRMasked = DAG.getNode(
6520 ISD::AND, DL, MVT::i32, FPSCR,
6522 SDValue Ops2[] = {Chain,
6523 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6524 FPSCRMasked};
6525 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6526}
6527
6529 const ARMSubtarget *ST) {
6530 SDLoc dl(N);
6531 EVT VT = N->getValueType(0);
6532 if (VT.isVector() && ST->hasNEON()) {
6533
6534 // Compute the least significant set bit: LSB = X & -X
6535 SDValue X = N->getOperand(0);
6536 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6537 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6538
6539 EVT ElemTy = VT.getVectorElementType();
6540
6541 if (ElemTy == MVT::i8) {
6542 // Compute with: cttz(x) = ctpop(lsb - 1)
6543 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6544 DAG.getTargetConstant(1, dl, ElemTy));
6545 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6546 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6547 }
6548
6549 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6550 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6551 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6552 unsigned NumBits = ElemTy.getSizeInBits();
6553 SDValue WidthMinus1 =
6554 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6555 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6556 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6557 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6558 }
6559
6560 // Compute with: cttz(x) = ctpop(lsb - 1)
6561
6562 // Compute LSB - 1.
6563 SDValue Bits;
6564 if (ElemTy == MVT::i64) {
6565 // Load constant 0xffff'ffff'ffff'ffff to register.
6566 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6567 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6568 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6569 } else {
6570 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6571 DAG.getTargetConstant(1, dl, ElemTy));
6572 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6573 }
6574 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6575 }
6576
6577 if (!ST->hasV6T2Ops())
6578 return SDValue();
6579
6580 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6581 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6582}
6583
6585 const ARMSubtarget *ST) {
6586 EVT VT = N->getValueType(0);
6587 SDLoc DL(N);
6588
6589 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6590 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6591 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6592 "Unexpected type for custom ctpop lowering");
6593
6594 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6595 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6596 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6597 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6598
6599 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6600 unsigned EltSize = 8;
6601 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6602 while (EltSize != VT.getScalarSizeInBits()) {
6604 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6605 TLI.getPointerTy(DAG.getDataLayout())));
6606 Ops.push_back(Res);
6607
6608 EltSize *= 2;
6609 NumElts /= 2;
6610 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6611 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6612 }
6613
6614 return Res;
6615}
6616
6617/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6618/// operand of a vector shift operation, where all the elements of the
6619/// build_vector must have the same constant integer value.
6620static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6621 // Ignore bit_converts.
6622 while (Op.getOpcode() == ISD::BITCAST)
6623 Op = Op.getOperand(0);
6624 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6625 APInt SplatBits, SplatUndef;
6626 unsigned SplatBitSize;
6627 bool HasAnyUndefs;
6628 if (!BVN ||
6629 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6630 ElementBits) ||
6631 SplatBitSize > ElementBits)
6632 return false;
6633 Cnt = SplatBits.getSExtValue();
6634 return true;
6635}
6636
6637/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6638/// operand of a vector shift left operation. That value must be in the range:
6639/// 0 <= Value < ElementBits for a left shift; or
6640/// 0 <= Value <= ElementBits for a long left shift.
6641static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6642 assert(VT.isVector() && "vector shift count is not a vector type");
6643 int64_t ElementBits = VT.getScalarSizeInBits();
6644 if (!getVShiftImm(Op, ElementBits, Cnt))
6645 return false;
6646 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6647}
6648
6649/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6650/// operand of a vector shift right operation. For a shift opcode, the value
6651/// is positive, but for an intrinsic the value count must be negative. The
6652/// absolute value must be in the range:
6653/// 1 <= |Value| <= ElementBits for a right shift; or
6654/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6655static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6656 int64_t &Cnt) {
6657 assert(VT.isVector() && "vector shift count is not a vector type");
6658 int64_t ElementBits = VT.getScalarSizeInBits();
6659 if (!getVShiftImm(Op, ElementBits, Cnt))
6660 return false;
6661 if (!isIntrinsic)
6662 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6663 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6664 Cnt = -Cnt;
6665 return true;
6666 }
6667 return false;
6668}
6669
6671 const ARMSubtarget *ST) {
6672 EVT VT = N->getValueType(0);
6673 SDLoc dl(N);
6674 int64_t Cnt;
6675
6676 if (!VT.isVector())
6677 return SDValue();
6678
6679 // We essentially have two forms here. Shift by an immediate and shift by a
6680 // vector register (there are also shift by a gpr, but that is just handled
6681 // with a tablegen pattern). We cannot easily match shift by an immediate in
6682 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6683 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6684 // signed or unsigned, and a negative shift indicates a shift right).
6685 if (N->getOpcode() == ISD::SHL) {
6686 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6687 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6688 DAG.getConstant(Cnt, dl, MVT::i32));
6689 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6690 N->getOperand(1));
6691 }
6692
6693 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6694 "unexpected vector shift opcode");
6695
6696 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6697 unsigned VShiftOpc =
6698 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6699 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6700 DAG.getConstant(Cnt, dl, MVT::i32));
6701 }
6702
6703 // Other right shifts we don't have operations for (we use a shift left by a
6704 // negative number).
6705 EVT ShiftVT = N->getOperand(1).getValueType();
6706 SDValue NegatedCount = DAG.getNode(
6707 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6708 unsigned VShiftOpc =
6709 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6710 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6711}
6712
6714 const ARMSubtarget *ST) {
6715 EVT VT = N->getValueType(0);
6716 SDLoc dl(N);
6717
6718 // We can get here for a node like i32 = ISD::SHL i32, i64
6719 if (VT != MVT::i64)
6720 return SDValue();
6721
6722 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6723 N->getOpcode() == ISD::SHL) &&
6724 "Unknown shift to lower!");
6725
6726 unsigned ShOpc = N->getOpcode();
6727 if (ST->hasMVEIntegerOps()) {
6728 SDValue ShAmt = N->getOperand(1);
6729 unsigned ShPartsOpc = ARMISD::LSLL;
6730 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6731
6732 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6733 // then do the default optimisation
6734 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6735 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6736 return SDValue();
6737
6738 // Extract the lower 32 bits of the shift amount if it's not an i32
6739 if (ShAmt->getValueType(0) != MVT::i32)
6740 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6741
6742 if (ShOpc == ISD::SRL) {
6743 if (!Con)
6744 // There is no t2LSRLr instruction so negate and perform an lsll if the
6745 // shift amount is in a register, emulating a right shift.
6746 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6747 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6748 else
6749 // Else generate an lsrl on the immediate shift amount
6750 ShPartsOpc = ARMISD::LSRL;
6751 } else if (ShOpc == ISD::SRA)
6752 ShPartsOpc = ARMISD::ASRL;
6753
6754 // Split Lower/Upper 32 bits of the destination/source
6755 SDValue Lo, Hi;
6756 std::tie(Lo, Hi) =
6757 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6758 // Generate the shift operation as computed above
6759 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6760 ShAmt);
6761 // The upper 32 bits come from the second return value of lsll
6762 Hi = SDValue(Lo.getNode(), 1);
6763 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6764 }
6765
6766 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6767 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6768 return SDValue();
6769
6770 // If we are in thumb mode, we don't have RRX.
6771 if (ST->isThumb1Only())
6772 return SDValue();
6773
6774 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6775 SDValue Lo, Hi;
6776 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6777
6778 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6779 // captures the result into a carry flag.
6780 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6781 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6782
6783 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6784 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6785
6786 // Merge the pieces into a single i64 value.
6787 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6788}
6789
6791 const ARMSubtarget *ST) {
6792 bool Invert = false;
6793 bool Swap = false;
6794 unsigned Opc = ARMCC::AL;
6795
6796 SDValue Op0 = Op.getOperand(0);
6797 SDValue Op1 = Op.getOperand(1);
6798 SDValue CC = Op.getOperand(2);
6799 EVT VT = Op.getValueType();
6800 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6801 SDLoc dl(Op);
6802
6803 EVT CmpVT;
6804 if (ST->hasNEON())
6806 else {
6807 assert(ST->hasMVEIntegerOps() &&
6808 "No hardware support for integer vector comparison!");
6809
6810 if (Op.getValueType().getVectorElementType() != MVT::i1)
6811 return SDValue();
6812
6813 // Make sure we expand floating point setcc to scalar if we do not have
6814 // mve.fp, so that we can handle them from there.
6815 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6816 return SDValue();
6817
6818 CmpVT = VT;
6819 }
6820
6821 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6822 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6823 // Special-case integer 64-bit equality comparisons. They aren't legal,
6824 // but they can be lowered with a few vector instructions.
6825 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6826 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6827 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6828 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6829 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6830 DAG.getCondCode(ISD::SETEQ));
6831 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6832 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6833 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6834 if (SetCCOpcode == ISD::SETNE)
6835 Merged = DAG.getNOT(dl, Merged, CmpVT);
6836 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6837 return Merged;
6838 }
6839
6840 if (CmpVT.getVectorElementType() == MVT::i64)
6841 // 64-bit comparisons are not legal in general.
6842 return SDValue();
6843
6844 if (Op1.getValueType().isFloatingPoint()) {
6845 switch (SetCCOpcode) {
6846 default: llvm_unreachable("Illegal FP comparison");
6847 case ISD::SETUNE:
6848 case ISD::SETNE:
6849 if (ST->hasMVEFloatOps()) {
6850 Opc = ARMCC::NE; break;
6851 } else {
6852 Invert = true; [[fallthrough]];
6853 }
6854 case ISD::SETOEQ:
6855 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6856 case ISD::SETOLT:
6857 case ISD::SETLT: Swap = true; [[fallthrough]];
6858 case ISD::SETOGT:
6859 case ISD::SETGT: Opc = ARMCC::GT; break;
6860 case ISD::SETOLE:
6861 case ISD::SETLE: Swap = true; [[fallthrough]];
6862 case ISD::SETOGE:
6863 case ISD::SETGE: Opc = ARMCC::GE; break;
6864 case ISD::SETUGE: Swap = true; [[fallthrough]];
6865 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6866 case ISD::SETUGT: Swap = true; [[fallthrough]];
6867 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6868 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6869 case ISD::SETONE: {
6870 // Expand this to (OLT | OGT).
6871 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6872 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6873 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6874 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6875 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6876 if (Invert)
6877 Result = DAG.getNOT(dl, Result, VT);
6878 return Result;
6879 }
6880 case ISD::SETUO: Invert = true; [[fallthrough]];
6881 case ISD::SETO: {
6882 // Expand this to (OLT | OGE).
6883 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6884 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6885 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6886 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6887 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6888 if (Invert)
6889 Result = DAG.getNOT(dl, Result, VT);
6890 return Result;
6891 }
6892 }
6893 } else {
6894 // Integer comparisons.
6895 switch (SetCCOpcode) {
6896 default: llvm_unreachable("Illegal integer comparison");
6897 case ISD::SETNE:
6898 if (ST->hasMVEIntegerOps()) {
6899 Opc = ARMCC::NE; break;
6900 } else {
6901 Invert = true; [[fallthrough]];
6902 }
6903 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6904 case ISD::SETLT: Swap = true; [[fallthrough]];
6905 case ISD::SETGT: Opc = ARMCC::GT; break;
6906 case ISD::SETLE: Swap = true; [[fallthrough]];
6907 case ISD::SETGE: Opc = ARMCC::GE; break;
6908 case ISD::SETULT: Swap = true; [[fallthrough]];
6909 case ISD::SETUGT: Opc = ARMCC::HI; break;
6910 case ISD::SETULE: Swap = true; [[fallthrough]];
6911 case ISD::SETUGE: Opc = ARMCC::HS; break;
6912 }
6913
6914 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6915 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6916 SDValue AndOp;
6918 AndOp = Op0;
6919 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6920 AndOp = Op1;
6921
6922 // Ignore bitconvert.
6923 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6924 AndOp = AndOp.getOperand(0);
6925
6926 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6927 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6928 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6929 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6930 if (!Invert)
6931 Result = DAG.getNOT(dl, Result, VT);
6932 return Result;
6933 }
6934 }
6935 }
6936
6937 if (Swap)
6938 std::swap(Op0, Op1);
6939
6940 // If one of the operands is a constant vector zero, attempt to fold the
6941 // comparison to a specialized compare-against-zero form.
6943 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6944 Opc == ARMCC::NE)) {
6945 if (Opc == ARMCC::GE)
6946 Opc = ARMCC::LE;
6947 else if (Opc == ARMCC::GT)
6948 Opc = ARMCC::LT;
6949 std::swap(Op0, Op1);
6950 }
6951
6952 SDValue Result;
6954 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6955 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6956 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6957 DAG.getConstant(Opc, dl, MVT::i32));
6958 else
6959 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6960 DAG.getConstant(Opc, dl, MVT::i32));
6961
6962 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6963
6964 if (Invert)
6965 Result = DAG.getNOT(dl, Result, VT);
6966
6967 return Result;
6968}
6969
6971 SDValue LHS = Op.getOperand(0);
6972 SDValue RHS = Op.getOperand(1);
6973 SDValue Carry = Op.getOperand(2);
6974 SDValue Cond = Op.getOperand(3);
6975 SDLoc DL(Op);
6976
6977 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6978
6979 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6980 // have to invert the carry first.
6981 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6982 DAG.getConstant(1, DL, MVT::i32), Carry);
6983 // This converts the boolean value carry into the carry flag.
6984 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6985
6986 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6987 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6988
6989 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6990 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6991 SDValue ARMcc = DAG.getConstant(
6992 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6993 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6994 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6995 Cmp.getValue(1), SDValue());
6996 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6997 CCR, Chain.getValue(1));
6998}
6999
7000/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7001/// valid vector constant for a NEON or MVE instruction with a "modified
7002/// immediate" operand (e.g., VMOV). If so, return the encoded value.
7003static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
7004 unsigned SplatBitSize, SelectionDAG &DAG,
7005 const SDLoc &dl, EVT &VT, EVT VectorVT,
7006 VMOVModImmType type) {
7007 unsigned OpCmode, Imm;
7008 bool is128Bits = VectorVT.is128BitVector();
7009
7010 // SplatBitSize is set to the smallest size that splats the vector, so a
7011 // zero vector will always have SplatBitSize == 8. However, NEON modified
7012 // immediate instructions others than VMOV do not support the 8-bit encoding
7013 // of a zero vector, and the default encoding of zero is supposed to be the
7014 // 32-bit version.
7015 if (SplatBits == 0)
7016 SplatBitSize = 32;
7017
7018 switch (SplatBitSize) {
7019 case 8:
7020 if (type != VMOVModImm)
7021 return SDValue();
7022 // Any 1-byte value is OK. Op=0, Cmode=1110.
7023 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
7024 OpCmode = 0xe;
7025 Imm = SplatBits;
7026 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
7027 break;
7028
7029 case 16:
7030 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7031 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
7032 if ((SplatBits & ~0xff) == 0) {
7033 // Value = 0x00nn: Op=x, Cmode=100x.
7034 OpCmode = 0x8;
7035 Imm = SplatBits;
7036 break;
7037 }
7038 if ((SplatBits & ~0xff00) == 0) {
7039 // Value = 0xnn00: Op=x, Cmode=101x.
7040 OpCmode = 0xa;
7041 Imm = SplatBits >> 8;
7042 break;
7043 }
7044 return SDValue();
7045
7046 case 32:
7047 // NEON's 32-bit VMOV supports splat values where:
7048 // * only one byte is nonzero, or
7049 // * the least significant byte is 0xff and the second byte is nonzero, or
7050 // * the least significant 2 bytes are 0xff and the third is nonzero.
7051 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7052 if ((SplatBits & ~0xff) == 0) {
7053 // Value = 0x000000nn: Op=x, Cmode=000x.
7054 OpCmode = 0;
7055 Imm = SplatBits;
7056 break;
7057 }
7058 if ((SplatBits & ~0xff00) == 0) {
7059 // Value = 0x0000nn00: Op=x, Cmode=001x.
7060 OpCmode = 0x2;
7061 Imm = SplatBits >> 8;
7062 break;
7063 }
7064 if ((SplatBits & ~0xff0000) == 0) {
7065 // Value = 0x00nn0000: Op=x, Cmode=010x.
7066 OpCmode = 0x4;
7067 Imm = SplatBits >> 16;
7068 break;
7069 }
7070 if ((SplatBits & ~0xff000000) == 0) {
7071 // Value = 0xnn000000: Op=x, Cmode=011x.
7072 OpCmode = 0x6;
7073 Imm = SplatBits >> 24;
7074 break;
7075 }
7076
7077 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7078 if (type == OtherModImm) return SDValue();
7079
7080 if ((SplatBits & ~0xffff) == 0 &&
7081 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7082 // Value = 0x0000nnff: Op=x, Cmode=1100.
7083 OpCmode = 0xc;
7084 Imm = SplatBits >> 8;
7085 break;
7086 }
7087
7088 // cmode == 0b1101 is not supported for MVE VMVN
7089 if (type == MVEVMVNModImm)
7090 return SDValue();
7091
7092 if ((SplatBits & ~0xffffff) == 0 &&
7093 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7094 // Value = 0x00nnffff: Op=x, Cmode=1101.
7095 OpCmode = 0xd;
7096 Imm = SplatBits >> 16;
7097 break;
7098 }
7099
7100 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7101 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7102 // VMOV.I32. A (very) minor optimization would be to replicate the value
7103 // and fall through here to test for a valid 64-bit splat. But, then the
7104 // caller would also need to check and handle the change in size.
7105 return SDValue();
7106
7107 case 64: {
7108 if (type != VMOVModImm)
7109 return SDValue();
7110 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7111 uint64_t BitMask = 0xff;
7112 unsigned ImmMask = 1;
7113 Imm = 0;
7114 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7115 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7116 Imm |= ImmMask;
7117 } else if ((SplatBits & BitMask) != 0) {
7118 return SDValue();
7119 }
7120 BitMask <<= 8;
7121 ImmMask <<= 1;
7122 }
7123
7124 if (DAG.getDataLayout().isBigEndian()) {
7125 // Reverse the order of elements within the vector.
7126 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7127 unsigned Mask = (1 << BytesPerElem) - 1;
7128 unsigned NumElems = 8 / BytesPerElem;
7129 unsigned NewImm = 0;
7130 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7131 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7132 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7133 }
7134 Imm = NewImm;
7135 }
7136
7137 // Op=1, Cmode=1110.
7138 OpCmode = 0x1e;
7139 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7140 break;
7141 }
7142
7143 default:
7144 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7145 }
7146
7147 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7148 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7149}
7150
7151SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7152 const ARMSubtarget *ST) const {
7153 EVT VT = Op.getValueType();
7154 bool IsDouble = (VT == MVT::f64);
7155 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7156 const APFloat &FPVal = CFP->getValueAPF();
7157
7158 // Prevent floating-point constants from using literal loads
7159 // when execute-only is enabled.
7160 if (ST->genExecuteOnly()) {
7161 // We shouldn't trigger this for v6m execute-only
7162 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7163 "Unexpected architecture");
7164
7165 // If we can represent the constant as an immediate, don't lower it
7166 if (isFPImmLegal(FPVal, VT))
7167 return Op;
7168 // Otherwise, construct as integer, and move to float register
7169 APInt INTVal = FPVal.bitcastToAPInt();
7170 SDLoc DL(CFP);
7171 switch (VT.getSimpleVT().SimpleTy) {
7172 default:
7173 llvm_unreachable("Unknown floating point type!");
7174 break;
7175 case MVT::f64: {
7176 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7177 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7178 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7179 }
7180 case MVT::f32:
7181 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7182 DAG.getConstant(INTVal, DL, MVT::i32));
7183 }
7184 }
7185
7186 if (!ST->hasVFP3Base())
7187 return SDValue();
7188
7189 // Use the default (constant pool) lowering for double constants when we have
7190 // an SP-only FPU
7191 if (IsDouble && !Subtarget->hasFP64())
7192 return SDValue();
7193
7194 // Try splatting with a VMOV.f32...
7195 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7196
7197 if (ImmVal != -1) {
7198 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7199 // We have code in place to select a valid ConstantFP already, no need to
7200 // do any mangling.
7201 return Op;
7202 }
7203
7204 // It's a float and we are trying to use NEON operations where
7205 // possible. Lower it to a splat followed by an extract.
7206 SDLoc DL(Op);
7207 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7208 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7209 NewVal);
7210 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7211 DAG.getConstant(0, DL, MVT::i32));
7212 }
7213
7214 // The rest of our options are NEON only, make sure that's allowed before
7215 // proceeding..
7216 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7217 return SDValue();
7218
7219 EVT VMovVT;
7220 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7221
7222 // It wouldn't really be worth bothering for doubles except for one very
7223 // important value, which does happen to match: 0.0. So make sure we don't do
7224 // anything stupid.
7225 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7226 return SDValue();
7227
7228 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7229 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7230 VMovVT, VT, VMOVModImm);
7231 if (NewVal != SDValue()) {
7232 SDLoc DL(Op);
7233 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7234 NewVal);
7235 if (IsDouble)
7236 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7237
7238 // It's a float: cast and extract a vector element.
7239 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7240 VecConstant);
7241 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7242 DAG.getConstant(0, DL, MVT::i32));
7243 }
7244
7245 // Finally, try a VMVN.i32
7246 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7247 VT, VMVNModImm);
7248 if (NewVal != SDValue()) {
7249 SDLoc DL(Op);
7250 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7251
7252 if (IsDouble)
7253 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7254
7255 // It's a float: cast and extract a vector element.
7256 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7257 VecConstant);
7258 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7259 DAG.getConstant(0, DL, MVT::i32));
7260 }
7261
7262 return SDValue();
7263}
7264
7265// check if an VEXT instruction can handle the shuffle mask when the
7266// vector sources of the shuffle are the same.
7267static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7268 unsigned NumElts = VT.getVectorNumElements();
7269
7270 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7271 if (M[0] < 0)
7272 return false;
7273
7274 Imm = M[0];
7275
7276 // If this is a VEXT shuffle, the immediate value is the index of the first
7277 // element. The other shuffle indices must be the successive elements after
7278 // the first one.
7279 unsigned ExpectedElt = Imm;
7280 for (unsigned i = 1; i < NumElts; ++i) {
7281 // Increment the expected index. If it wraps around, just follow it
7282 // back to index zero and keep going.
7283 ++ExpectedElt;
7284 if (ExpectedElt == NumElts)
7285 ExpectedElt = 0;
7286
7287 if (M[i] < 0) continue; // ignore UNDEF indices
7288 if (ExpectedElt != static_cast<unsigned>(M[i]))
7289 return false;
7290 }
7291
7292 return true;
7293}
7294
7295static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7296 bool &ReverseVEXT, unsigned &Imm) {
7297 unsigned NumElts = VT.getVectorNumElements();
7298 ReverseVEXT = false;
7299
7300 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7301 if (M[0] < 0)
7302 return false;
7303
7304 Imm = M[0];
7305
7306 // If this is a VEXT shuffle, the immediate value is the index of the first
7307 // element. The other shuffle indices must be the successive elements after
7308 // the first one.
7309 unsigned ExpectedElt = Imm;
7310 for (unsigned i = 1; i < NumElts; ++i) {
7311 // Increment the expected index. If it wraps around, it may still be
7312 // a VEXT but the source vectors must be swapped.
7313 ExpectedElt += 1;
7314 if (ExpectedElt == NumElts * 2) {
7315 ExpectedElt = 0;
7316 ReverseVEXT = true;
7317 }
7318
7319 if (M[i] < 0) continue; // ignore UNDEF indices
7320 if (ExpectedElt != static_cast<unsigned>(M[i]))
7321 return false;
7322 }
7323
7324 // Adjust the index value if the source operands will be swapped.
7325 if (ReverseVEXT)
7326 Imm -= NumElts;
7327
7328 return true;
7329}
7330
7331static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7332 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7333 // range, then 0 is placed into the resulting vector. So pretty much any mask
7334 // of 8 elements can work here.
7335 return VT == MVT::v8i8 && M.size() == 8;
7336}
7337
7338static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7339 unsigned Index) {
7340 if (Mask.size() == Elements * 2)
7341 return Index / Elements;
7342 return Mask[Index] == 0 ? 0 : 1;
7343}
7344
7345// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7346// checking that pairs of elements in the shuffle mask represent the same index
7347// in each vector, incrementing the expected index by 2 at each step.
7348// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7349// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7350// v2={e,f,g,h}
7351// WhichResult gives the offset for each element in the mask based on which
7352// of the two results it belongs to.
7353//
7354// The transpose can be represented either as:
7355// result1 = shufflevector v1, v2, result1_shuffle_mask
7356// result2 = shufflevector v1, v2, result2_shuffle_mask
7357// where v1/v2 and the shuffle masks have the same number of elements
7358// (here WhichResult (see below) indicates which result is being checked)
7359//
7360// or as:
7361// results = shufflevector v1, v2, shuffle_mask
7362// where both results are returned in one vector and the shuffle mask has twice
7363// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7364// want to check the low half and high half of the shuffle mask as if it were
7365// the other case
7366static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7367 unsigned EltSz = VT.getScalarSizeInBits();
7368 if (EltSz == 64)
7369 return false;
7370
7371 unsigned NumElts = VT.getVectorNumElements();
7372 if (M.size() != NumElts && M.size() != NumElts*2)
7373 return false;
7374
7375 // If the mask is twice as long as the input vector then we need to check the
7376 // upper and lower parts of the mask with a matching value for WhichResult
7377 // FIXME: A mask with only even values will be rejected in case the first
7378 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7379 // M[0] is used to determine WhichResult
7380 for (unsigned i = 0; i < M.size(); i += NumElts) {
7381 WhichResult = SelectPairHalf(NumElts, M, i);
7382 for (unsigned j = 0; j < NumElts; j += 2) {
7383 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7384 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7385 return false;
7386 }
7387 }
7388
7389 if (M.size() == NumElts*2)
7390 WhichResult = 0;
7391
7392 return true;
7393}
7394
7395/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7396/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7397/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7398static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7399 unsigned EltSz = VT.getScalarSizeInBits();
7400 if (EltSz == 64)
7401 return false;
7402
7403 unsigned NumElts = VT.getVectorNumElements();
7404 if (M.size() != NumElts && M.size() != NumElts*2)
7405 return false;
7406
7407 for (unsigned i = 0; i < M.size(); i += NumElts) {
7408 WhichResult = SelectPairHalf(NumElts, M, i);
7409 for (unsigned j = 0; j < NumElts; j += 2) {
7410 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7411 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7412 return false;
7413 }
7414 }
7415
7416 if (M.size() == NumElts*2)
7417 WhichResult = 0;
7418
7419 return true;
7420}
7421
7422// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7423// that the mask elements are either all even and in steps of size 2 or all odd
7424// and in steps of size 2.
7425// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7426// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7427// v2={e,f,g,h}
7428// Requires similar checks to that of isVTRNMask with
7429// respect the how results are returned.
7430static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7431 unsigned EltSz = VT.getScalarSizeInBits();
7432 if (EltSz == 64)
7433 return false;
7434
7435 unsigned NumElts = VT.getVectorNumElements();
7436 if (M.size() != NumElts && M.size() != NumElts*2)
7437 return false;
7438
7439 for (unsigned i = 0; i < M.size(); i += NumElts) {
7440 WhichResult = SelectPairHalf(NumElts, M, i);
7441 for (unsigned j = 0; j < NumElts; ++j) {
7442 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7443 return false;
7444 }
7445 }
7446
7447 if (M.size() == NumElts*2)
7448 WhichResult = 0;
7449
7450 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7451 if (VT.is64BitVector() && EltSz == 32)
7452 return false;
7453
7454 return true;
7455}
7456
7457/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7458/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7459/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7460static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7461 unsigned EltSz = VT.getScalarSizeInBits();
7462 if (EltSz == 64)
7463 return false;
7464
7465 unsigned NumElts = VT.getVectorNumElements();
7466 if (M.size() != NumElts && M.size() != NumElts*2)
7467 return false;
7468
7469 unsigned Half = NumElts / 2;
7470 for (unsigned i = 0; i < M.size(); i += NumElts) {
7471 WhichResult = SelectPairHalf(NumElts, M, i);
7472 for (unsigned j = 0; j < NumElts; j += Half) {
7473 unsigned Idx = WhichResult;
7474 for (unsigned k = 0; k < Half; ++k) {
7475 int MIdx = M[i + j + k];
7476 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7477 return false;
7478 Idx += 2;
7479 }
7480 }
7481 }
7482
7483 if (M.size() == NumElts*2)
7484 WhichResult = 0;
7485
7486 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7487 if (VT.is64BitVector() && EltSz == 32)
7488 return false;
7489
7490 return true;
7491}
7492
7493// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7494// that pairs of elements of the shufflemask represent the same index in each
7495// vector incrementing sequentially through the vectors.
7496// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7497// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7498// v2={e,f,g,h}
7499// Requires similar checks to that of isVTRNMask with respect the how results
7500// are returned.
7501static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7502 unsigned EltSz = VT.getScalarSizeInBits();
7503 if (EltSz == 64)
7504 return false;
7505
7506 unsigned NumElts = VT.getVectorNumElements();
7507 if (M.size() != NumElts && M.size() != NumElts*2)
7508 return false;
7509
7510 for (unsigned i = 0; i < M.size(); i += NumElts) {
7511 WhichResult = SelectPairHalf(NumElts, M, i);
7512 unsigned Idx = WhichResult * NumElts / 2;
7513 for (unsigned j = 0; j < NumElts; j += 2) {
7514 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7515 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7516 return false;
7517 Idx += 1;
7518 }
7519 }
7520
7521 if (M.size() == NumElts*2)
7522 WhichResult = 0;
7523
7524 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7525 if (VT.is64BitVector() && EltSz == 32)
7526 return false;
7527
7528 return true;
7529}
7530
7531/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7532/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7533/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7534static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7535 unsigned EltSz = VT.getScalarSizeInBits();
7536 if (EltSz == 64)
7537 return false;
7538
7539 unsigned NumElts = VT.getVectorNumElements();
7540 if (M.size() != NumElts && M.size() != NumElts*2)
7541 return false;
7542
7543 for (unsigned i = 0; i < M.size(); i += NumElts) {
7544 WhichResult = SelectPairHalf(NumElts, M, i);
7545 unsigned Idx = WhichResult * NumElts / 2;
7546 for (unsigned j = 0; j < NumElts; j += 2) {
7547 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7548 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7549 return false;
7550 Idx += 1;
7551 }
7552 }
7553
7554 if (M.size() == NumElts*2)
7555 WhichResult = 0;
7556
7557 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7558 if (VT.is64BitVector() && EltSz == 32)
7559 return false;
7560
7561 return true;
7562}
7563
7564/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7565/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7566static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7567 unsigned &WhichResult,
7568 bool &isV_UNDEF) {
7569 isV_UNDEF = false;
7570 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7571 return ARMISD::VTRN;
7572 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7573 return ARMISD::VUZP;
7574 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7575 return ARMISD::VZIP;
7576
7577 isV_UNDEF = true;
7578 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7579 return ARMISD::VTRN;
7580 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7581 return ARMISD::VUZP;
7582 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7583 return ARMISD::VZIP;
7584
7585 return 0;
7586}
7587
7588/// \return true if this is a reverse operation on an vector.
7589static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7590 unsigned NumElts = VT.getVectorNumElements();
7591 // Make sure the mask has the right size.
7592 if (NumElts != M.size())
7593 return false;
7594
7595 // Look for <15, ..., 3, -1, 1, 0>.
7596 for (unsigned i = 0; i != NumElts; ++i)
7597 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7598 return false;
7599
7600 return true;
7601}
7602
7603static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7604 unsigned NumElts = VT.getVectorNumElements();
7605 // Make sure the mask has the right size.
7606 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7607 return false;
7608
7609 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7610 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7611 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7612 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7613 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7614 int Ofs = Top ? 1 : 0;
7615 int Upper = SingleSource ? 0 : NumElts;
7616 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7617 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7618 return false;
7619 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7620 return false;
7621 }
7622 return true;
7623}
7624
7625static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7626 unsigned NumElts = VT.getVectorNumElements();
7627 // Make sure the mask has the right size.
7628 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7629 return false;
7630
7631 // If Top
7632 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7633 // This inserts Input2 into Input1
7634 // else if not Top
7635 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7636 // This inserts Input1 into Input2
7637 unsigned Offset = Top ? 0 : 1;
7638 unsigned N = SingleSource ? 0 : NumElts;
7639 for (unsigned i = 0; i < NumElts; i += 2) {
7640 if (M[i] >= 0 && M[i] != (int)i)
7641 return false;
7642 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7643 return false;
7644 }
7645
7646 return true;
7647}
7648
7649static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7650 unsigned NumElts = ToVT.getVectorNumElements();
7651 if (NumElts != M.size())
7652 return false;
7653
7654 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7655 // looking for patterns of:
7656 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7657 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7658
7659 unsigned Off0 = rev ? NumElts / 2 : 0;
7660 unsigned Off1 = rev ? 0 : NumElts / 2;
7661 for (unsigned i = 0; i < NumElts; i += 2) {
7662 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7663 return false;
7664 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7665 return false;
7666 }
7667
7668 return true;
7669}
7670
7671// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7672// from a pair of inputs. For example:
7673// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7674// FP_ROUND(EXTRACT_ELT(Y, 0),
7675// FP_ROUND(EXTRACT_ELT(X, 1),
7676// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7678 const ARMSubtarget *ST) {
7679 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7680 if (!ST->hasMVEFloatOps())
7681 return SDValue();
7682
7683 SDLoc dl(BV);
7684 EVT VT = BV.getValueType();
7685 if (VT != MVT::v8f16)
7686 return SDValue();
7687
7688 // We are looking for a buildvector of fptrunc elements, where all the
7689 // elements are interleavingly extracted from two sources. Check the first two
7690 // items are valid enough and extract some info from them (they are checked
7691 // properly in the loop below).
7692 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7695 return SDValue();
7696 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7699 return SDValue();
7700 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7701 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7702 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7703 return SDValue();
7704
7705 // Check all the values in the BuildVector line up with our expectations.
7706 for (unsigned i = 1; i < 4; i++) {
7707 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7708 return Trunc.getOpcode() == ISD::FP_ROUND &&
7710 Trunc.getOperand(0).getOperand(0) == Op &&
7711 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7712 };
7713 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7714 return SDValue();
7715 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7716 return SDValue();
7717 }
7718
7719 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7720 DAG.getConstant(0, dl, MVT::i32));
7721 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7722 DAG.getConstant(1, dl, MVT::i32));
7723}
7724
7725// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7726// from a single input on alternating lanes. For example:
7727// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7728// FP_ROUND(EXTRACT_ELT(X, 2),
7729// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7731 const ARMSubtarget *ST) {
7732 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7733 if (!ST->hasMVEFloatOps())
7734 return SDValue();
7735
7736 SDLoc dl(BV);
7737 EVT VT = BV.getValueType();
7738 if (VT != MVT::v4f32)
7739 return SDValue();
7740
7741 // We are looking for a buildvector of fptext elements, where all the
7742 // elements are alternating lanes from a single source. For example <0,2,4,6>
7743 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7744 // info from them (they are checked properly in the loop below).
7745 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7747 return SDValue();
7748 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7750 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7751 return SDValue();
7752
7753 // Check all the values in the BuildVector line up with our expectations.
7754 for (unsigned i = 1; i < 4; i++) {
7755 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7756 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7758 Trunc.getOperand(0).getOperand(0) == Op &&
7759 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7760 };
7761 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7762 return SDValue();
7763 }
7764
7765 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7766 DAG.getConstant(Offset, dl, MVT::i32));
7767}
7768
7769// If N is an integer constant that can be moved into a register in one
7770// instruction, return an SDValue of such a constant (will become a MOV
7771// instruction). Otherwise return null.
7773 const ARMSubtarget *ST, const SDLoc &dl) {
7774 uint64_t Val;
7775 if (!isa<ConstantSDNode>(N))
7776 return SDValue();
7777 Val = N->getAsZExtVal();
7778
7779 if (ST->isThumb1Only()) {
7780 if (Val <= 255 || ~Val <= 255)
7781 return DAG.getConstant(Val, dl, MVT::i32);
7782 } else {
7783 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7784 return DAG.getConstant(Val, dl, MVT::i32);
7785 }
7786 return SDValue();
7787}
7788
7790 const ARMSubtarget *ST) {
7791 SDLoc dl(Op);
7792 EVT VT = Op.getValueType();
7793
7794 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7795
7796 unsigned NumElts = VT.getVectorNumElements();
7797 unsigned BoolMask;
7798 unsigned BitsPerBool;
7799 if (NumElts == 2) {
7800 BitsPerBool = 8;
7801 BoolMask = 0xff;
7802 } else if (NumElts == 4) {
7803 BitsPerBool = 4;
7804 BoolMask = 0xf;
7805 } else if (NumElts == 8) {
7806 BitsPerBool = 2;
7807 BoolMask = 0x3;
7808 } else if (NumElts == 16) {
7809 BitsPerBool = 1;
7810 BoolMask = 0x1;
7811 } else
7812 return SDValue();
7813
7814 // If this is a single value copied into all lanes (a splat), we can just sign
7815 // extend that single value
7816 SDValue FirstOp = Op.getOperand(0);
7817 if (!isa<ConstantSDNode>(FirstOp) &&
7818 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7819 return U.get().isUndef() || U.get() == FirstOp;
7820 })) {
7821 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7822 DAG.getValueType(MVT::i1));
7823 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7824 }
7825
7826 // First create base with bits set where known
7827 unsigned Bits32 = 0;
7828 for (unsigned i = 0; i < NumElts; ++i) {
7829 SDValue V = Op.getOperand(i);
7830 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7831 continue;
7832 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7833 if (BitSet)
7834 Bits32 |= BoolMask << (i * BitsPerBool);
7835 }
7836
7837 // Add in unknown nodes
7839 DAG.getConstant(Bits32, dl, MVT::i32));
7840 for (unsigned i = 0; i < NumElts; ++i) {
7841 SDValue V = Op.getOperand(i);
7842 if (isa<ConstantSDNode>(V) || V.isUndef())
7843 continue;
7844 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7845 DAG.getConstant(i, dl, MVT::i32));
7846 }
7847
7848 return Base;
7849}
7850
7852 const ARMSubtarget *ST) {
7853 if (!ST->hasMVEIntegerOps())
7854 return SDValue();
7855
7856 // We are looking for a buildvector where each element is Op[0] + i*N
7857 EVT VT = Op.getValueType();
7858 SDValue Op0 = Op.getOperand(0);
7859 unsigned NumElts = VT.getVectorNumElements();
7860
7861 // Get the increment value from operand 1
7862 SDValue Op1 = Op.getOperand(1);
7863 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7864 !isa<ConstantSDNode>(Op1.getOperand(1)))
7865 return SDValue();
7866 unsigned N = Op1.getConstantOperandVal(1);
7867 if (N != 1 && N != 2 && N != 4 && N != 8)
7868 return SDValue();
7869
7870 // Check that each other operand matches
7871 for (unsigned I = 2; I < NumElts; I++) {
7872 SDValue OpI = Op.getOperand(I);
7873 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7874 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7875 OpI.getConstantOperandVal(1) != I * N)
7876 return SDValue();
7877 }
7878
7879 SDLoc DL(Op);
7880 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7881 DAG.getConstant(N, DL, MVT::i32));
7882}
7883
7884// Returns true if the operation N can be treated as qr instruction variant at
7885// operand Op.
7886static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7887 switch (N->getOpcode()) {
7888 case ISD::ADD:
7889 case ISD::MUL:
7890 case ISD::SADDSAT:
7891 case ISD::UADDSAT:
7892 return true;
7893 case ISD::SUB:
7894 case ISD::SSUBSAT:
7895 case ISD::USUBSAT:
7896 return N->getOperand(1).getNode() == Op;
7898 switch (N->getConstantOperandVal(0)) {
7899 case Intrinsic::arm_mve_add_predicated:
7900 case Intrinsic::arm_mve_mul_predicated:
7901 case Intrinsic::arm_mve_qadd_predicated:
7902 case Intrinsic::arm_mve_vhadd:
7903 case Intrinsic::arm_mve_hadd_predicated:
7904 case Intrinsic::arm_mve_vqdmulh:
7905 case Intrinsic::arm_mve_qdmulh_predicated:
7906 case Intrinsic::arm_mve_vqrdmulh:
7907 case Intrinsic::arm_mve_qrdmulh_predicated:
7908 case Intrinsic::arm_mve_vqdmull:
7909 case Intrinsic::arm_mve_vqdmull_predicated:
7910 return true;
7911 case Intrinsic::arm_mve_sub_predicated:
7912 case Intrinsic::arm_mve_qsub_predicated:
7913 case Intrinsic::arm_mve_vhsub:
7914 case Intrinsic::arm_mve_hsub_predicated:
7915 return N->getOperand(2).getNode() == Op;
7916 default:
7917 return false;
7918 }
7919 default:
7920 return false;
7921 }
7922}
7923
7924// If this is a case we can't handle, return null and let the default
7925// expansion code take care of it.
7926SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7927 const ARMSubtarget *ST) const {
7928 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7929 SDLoc dl(Op);
7930 EVT VT = Op.getValueType();
7931
7932 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7933 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7934
7935 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7936 return R;
7937
7938 APInt SplatBits, SplatUndef;
7939 unsigned SplatBitSize;
7940 bool HasAnyUndefs;
7941 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7942 if (SplatUndef.isAllOnes())
7943 return DAG.getUNDEF(VT);
7944
7945 // If all the users of this constant splat are qr instruction variants,
7946 // generate a vdup of the constant.
7947 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7948 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7949 all_of(BVN->uses(),
7950 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7951 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7952 : SplatBitSize == 16 ? MVT::v8i16
7953 : MVT::v16i8;
7954 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7955 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7956 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7957 }
7958
7959 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7960 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7961 // Check if an immediate VMOV works.
7962 EVT VmovVT;
7963 SDValue Val =
7964 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7965 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7966
7967 if (Val.getNode()) {
7968 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7969 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7970 }
7971
7972 // Try an immediate VMVN.
7973 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7974 Val = isVMOVModifiedImm(
7975 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7976 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7977 if (Val.getNode()) {
7978 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7979 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7980 }
7981
7982 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7983 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7984 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7985 if (ImmVal != -1) {
7986 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7987 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7988 }
7989 }
7990
7991 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7992 // type.
7993 if (ST->hasMVEIntegerOps() &&
7994 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7995 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7996 : SplatBitSize == 16 ? MVT::v8i16
7997 : MVT::v16i8;
7998 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7999 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
8000 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
8001 }
8002 }
8003 }
8004
8005 // Scan through the operands to see if only one value is used.
8006 //
8007 // As an optimisation, even if more than one value is used it may be more
8008 // profitable to splat with one value then change some lanes.
8009 //
8010 // Heuristically we decide to do this if the vector has a "dominant" value,
8011 // defined as splatted to more than half of the lanes.
8012 unsigned NumElts = VT.getVectorNumElements();
8013 bool isOnlyLowElement = true;
8014 bool usesOnlyOneValue = true;
8015 bool hasDominantValue = false;
8016 bool isConstant = true;
8017
8018 // Map of the number of times a particular SDValue appears in the
8019 // element list.
8020 DenseMap<SDValue, unsigned> ValueCounts;
8021 SDValue Value;
8022 for (unsigned i = 0; i < NumElts; ++i) {
8023 SDValue V = Op.getOperand(i);
8024 if (V.isUndef())
8025 continue;
8026 if (i > 0)
8027 isOnlyLowElement = false;
8028 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
8029 isConstant = false;
8030
8031 ValueCounts.insert(std::make_pair(V, 0));
8032 unsigned &Count = ValueCounts[V];
8033
8034 // Is this value dominant? (takes up more than half of the lanes)
8035 if (++Count > (NumElts / 2)) {
8036 hasDominantValue = true;
8037 Value = V;
8038 }
8039 }
8040 if (ValueCounts.size() != 1)
8041 usesOnlyOneValue = false;
8042 if (!Value.getNode() && !ValueCounts.empty())
8043 Value = ValueCounts.begin()->first;
8044
8045 if (ValueCounts.empty())
8046 return DAG.getUNDEF(VT);
8047
8048 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8049 // Keep going if we are hitting this case.
8050 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8051 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8052
8053 unsigned EltSize = VT.getScalarSizeInBits();
8054
8055 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8056 // i32 and try again.
8057 if (hasDominantValue && EltSize <= 32) {
8058 if (!isConstant) {
8059 SDValue N;
8060
8061 // If we are VDUPing a value that comes directly from a vector, that will
8062 // cause an unnecessary move to and from a GPR, where instead we could
8063 // just use VDUPLANE. We can only do this if the lane being extracted
8064 // is at a constant index, as the VDUP from lane instructions only have
8065 // constant-index forms.
8066 ConstantSDNode *constIndex;
8067 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8068 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8069 // We need to create a new undef vector to use for the VDUPLANE if the
8070 // size of the vector from which we get the value is different than the
8071 // size of the vector that we need to create. We will insert the element
8072 // such that the register coalescer will remove unnecessary copies.
8073 if (VT != Value->getOperand(0).getValueType()) {
8074 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8076 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8077 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8078 Value, DAG.getConstant(index, dl, MVT::i32)),
8079 DAG.getConstant(index, dl, MVT::i32));
8080 } else
8081 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8082 Value->getOperand(0), Value->getOperand(1));
8083 } else
8084 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8085
8086 if (!usesOnlyOneValue) {
8087 // The dominant value was splatted as 'N', but we now have to insert
8088 // all differing elements.
8089 for (unsigned I = 0; I < NumElts; ++I) {
8090 if (Op.getOperand(I) == Value)
8091 continue;
8093 Ops.push_back(N);
8094 Ops.push_back(Op.getOperand(I));
8095 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8096 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8097 }
8098 }
8099 return N;
8100 }
8104 assert(FVT == MVT::f32 || FVT == MVT::f16);
8105 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8106 for (unsigned i = 0; i < NumElts; ++i)
8107 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8108 Op.getOperand(i)));
8109 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8110 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8111 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8112 if (Val.getNode())
8113 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8114 }
8115 if (usesOnlyOneValue) {
8116 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8117 if (isConstant && Val.getNode())
8118 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8119 }
8120 }
8121
8122 // If all elements are constants and the case above didn't get hit, fall back
8123 // to the default expansion, which will generate a load from the constant
8124 // pool.
8125 if (isConstant)
8126 return SDValue();
8127
8128 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8129 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8130 // length <= 2.
8131 if (NumElts >= 4)
8132 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8133 return shuffle;
8134
8135 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8136 // VCVT's
8137 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8138 return VCVT;
8139 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8140 return VCVT;
8141
8142 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8143 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8144 // into two 64-bit vectors; we might discover a better way to lower it.
8145 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8146 EVT ExtVT = VT.getVectorElementType();
8147 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8148 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8149 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8150 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8151 SDValue Upper =
8152 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8153 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8154 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8155 if (Lower && Upper)
8156 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8157 }
8158
8159 // Vectors with 32- or 64-bit elements can be built by directly assigning
8160 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8161 // will be legalized.
8162 if (EltSize >= 32) {
8163 // Do the expansion with floating-point types, since that is what the VFP
8164 // registers are defined to use, and since i64 is not legal.
8165 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8166 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8168 for (unsigned i = 0; i < NumElts; ++i)
8169 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8170 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8171 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8172 }
8173
8174 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8175 // know the default expansion would otherwise fall back on something even
8176 // worse. For a vector with one or two non-undef values, that's
8177 // scalar_to_vector for the elements followed by a shuffle (provided the
8178 // shuffle is valid for the target) and materialization element by element
8179 // on the stack followed by a load for everything else.
8180 if (!isConstant && !usesOnlyOneValue) {
8181 SDValue Vec = DAG.getUNDEF(VT);
8182 for (unsigned i = 0 ; i < NumElts; ++i) {
8183 SDValue V = Op.getOperand(i);
8184 if (V.isUndef())
8185 continue;
8186 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8187 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8188 }
8189 return Vec;
8190 }
8191
8192 return SDValue();
8193}
8194
8195// Gather data to see if the operation can be modelled as a
8196// shuffle in combination with VEXTs.
8197SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8198 SelectionDAG &DAG) const {
8199 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8200 SDLoc dl(Op);
8201 EVT VT = Op.getValueType();
8202 unsigned NumElts = VT.getVectorNumElements();
8203
8204 struct ShuffleSourceInfo {
8205 SDValue Vec;
8206 unsigned MinElt = std::numeric_limits<unsigned>::max();
8207 unsigned MaxElt = 0;
8208
8209 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8210 // be compatible with the shuffle we intend to construct. As a result
8211 // ShuffleVec will be some sliding window into the original Vec.
8212 SDValue ShuffleVec;
8213
8214 // Code should guarantee that element i in Vec starts at element "WindowBase
8215 // + i * WindowScale in ShuffleVec".
8216 int WindowBase = 0;
8217 int WindowScale = 1;
8218
8219 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8220
8221 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8222 };
8223
8224 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8225 // node.
8227 for (unsigned i = 0; i < NumElts; ++i) {
8228 SDValue V = Op.getOperand(i);
8229 if (V.isUndef())
8230 continue;
8231 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8232 // A shuffle can only come from building a vector from various
8233 // elements of other vectors.
8234 return SDValue();
8235 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8236 // Furthermore, shuffles require a constant mask, whereas extractelts
8237 // accept variable indices.
8238 return SDValue();
8239 }
8240
8241 // Add this element source to the list if it's not already there.
8242 SDValue SourceVec = V.getOperand(0);
8243 auto Source = llvm::find(Sources, SourceVec);
8244 if (Source == Sources.end())
8245 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8246
8247 // Update the minimum and maximum lane number seen.
8248 unsigned EltNo = V.getConstantOperandVal(1);
8249 Source->MinElt = std::min(Source->MinElt, EltNo);
8250 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8251 }
8252
8253 // Currently only do something sane when at most two source vectors
8254 // are involved.
8255 if (Sources.size() > 2)
8256 return SDValue();
8257
8258 // Find out the smallest element size among result and two sources, and use
8259 // it as element size to build the shuffle_vector.
8260 EVT SmallestEltTy = VT.getVectorElementType();
8261 for (auto &Source : Sources) {
8262 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8263 if (SrcEltTy.bitsLT(SmallestEltTy))
8264 SmallestEltTy = SrcEltTy;
8265 }
8266 unsigned ResMultiplier =
8267 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8268 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8269 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8270
8271 // If the source vector is too wide or too narrow, we may nevertheless be able
8272 // to construct a compatible shuffle either by concatenating it with UNDEF or
8273 // extracting a suitable range of elements.
8274 for (auto &Src : Sources) {
8275 EVT SrcVT = Src.ShuffleVec.getValueType();
8276
8277 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8278 uint64_t VTSize = VT.getFixedSizeInBits();
8279 if (SrcVTSize == VTSize)
8280 continue;
8281
8282 // This stage of the search produces a source with the same element type as
8283 // the original, but with a total width matching the BUILD_VECTOR output.
8284 EVT EltVT = SrcVT.getVectorElementType();
8285 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8286 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8287
8288 if (SrcVTSize < VTSize) {
8289 if (2 * SrcVTSize != VTSize)
8290 return SDValue();
8291 // We can pad out the smaller vector for free, so if it's part of a
8292 // shuffle...
8293 Src.ShuffleVec =
8294 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8295 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8296 continue;
8297 }
8298
8299 if (SrcVTSize != 2 * VTSize)
8300 return SDValue();
8301
8302 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8303 // Span too large for a VEXT to cope
8304 return SDValue();
8305 }
8306
8307 if (Src.MinElt >= NumSrcElts) {
8308 // The extraction can just take the second half
8309 Src.ShuffleVec =
8310 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8311 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8312 Src.WindowBase = -NumSrcElts;
8313 } else if (Src.MaxElt < NumSrcElts) {
8314 // The extraction can just take the first half
8315 Src.ShuffleVec =
8316 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8317 DAG.getConstant(0, dl, MVT::i32));
8318 } else {
8319 // An actual VEXT is needed
8320 SDValue VEXTSrc1 =
8321 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8322 DAG.getConstant(0, dl, MVT::i32));
8323 SDValue VEXTSrc2 =
8324 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8325 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8326
8327 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8328 VEXTSrc2,
8329 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8330 Src.WindowBase = -Src.MinElt;
8331 }
8332 }
8333
8334 // Another possible incompatibility occurs from the vector element types. We
8335 // can fix this by bitcasting the source vectors to the same type we intend
8336 // for the shuffle.
8337 for (auto &Src : Sources) {
8338 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8339 if (SrcEltTy == SmallestEltTy)
8340 continue;
8341 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8342 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8343 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8344 Src.WindowBase *= Src.WindowScale;
8345 }
8346
8347 // Final check before we try to actually produce a shuffle.
8348 LLVM_DEBUG(for (auto Src
8349 : Sources)
8350 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8351
8352 // The stars all align, our next step is to produce the mask for the shuffle.
8354 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8355 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8356 SDValue Entry = Op.getOperand(i);
8357 if (Entry.isUndef())
8358 continue;
8359
8360 auto Src = llvm::find(Sources, Entry.getOperand(0));
8361 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8362
8363 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8364 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8365 // segment.
8366 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8367 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8368 VT.getScalarSizeInBits());
8369 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8370
8371 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8372 // starting at the appropriate offset.
8373 int *LaneMask = &Mask[i * ResMultiplier];
8374
8375 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8376 ExtractBase += NumElts * (Src - Sources.begin());
8377 for (int j = 0; j < LanesDefined; ++j)
8378 LaneMask[j] = ExtractBase + j;
8379 }
8380
8381
8382 // We can't handle more than two sources. This should have already
8383 // been checked before this point.
8384 assert(Sources.size() <= 2 && "Too many sources!");
8385
8386 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8387 for (unsigned i = 0; i < Sources.size(); ++i)
8388 ShuffleOps[i] = Sources[i].ShuffleVec;
8389
8390 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8391 ShuffleOps[1], Mask, DAG);
8392 if (!Shuffle)
8393 return SDValue();
8394 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8395}
8396
8398 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8407 OP_VUZPL, // VUZP, left result
8408 OP_VUZPR, // VUZP, right result
8409 OP_VZIPL, // VZIP, left result
8410 OP_VZIPR, // VZIP, right result
8411 OP_VTRNL, // VTRN, left result
8412 OP_VTRNR // VTRN, right result
8414
8415static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8416 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8417 switch (OpNum) {
8418 case OP_COPY:
8419 case OP_VREV:
8420 case OP_VDUP0:
8421 case OP_VDUP1:
8422 case OP_VDUP2:
8423 case OP_VDUP3:
8424 return true;
8425 }
8426 return false;
8427}
8428
8429/// isShuffleMaskLegal - Targets can use this to indicate that they only
8430/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8431/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8432/// are assumed to be legal.
8434 if (VT.getVectorNumElements() == 4 &&
8435 (VT.is128BitVector() || VT.is64BitVector())) {
8436 unsigned PFIndexes[4];
8437 for (unsigned i = 0; i != 4; ++i) {
8438 if (M[i] < 0)
8439 PFIndexes[i] = 8;
8440 else
8441 PFIndexes[i] = M[i];
8442 }
8443
8444 // Compute the index in the perfect shuffle table.
8445 unsigned PFTableIndex =
8446 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8447 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8448 unsigned Cost = (PFEntry >> 30);
8449
8450 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8451 return true;
8452 }
8453
8454 bool ReverseVEXT, isV_UNDEF;
8455 unsigned Imm, WhichResult;
8456
8457 unsigned EltSize = VT.getScalarSizeInBits();
8458 if (EltSize >= 32 ||
8460 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8461 isVREVMask(M, VT, 64) ||
8462 isVREVMask(M, VT, 32) ||
8463 isVREVMask(M, VT, 16))
8464 return true;
8465 else if (Subtarget->hasNEON() &&
8466 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8467 isVTBLMask(M, VT) ||
8468 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8469 return true;
8470 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8471 isReverseMask(M, VT))
8472 return true;
8473 else if (Subtarget->hasMVEIntegerOps() &&
8474 (isVMOVNMask(M, VT, true, false) ||
8475 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8476 return true;
8477 else if (Subtarget->hasMVEIntegerOps() &&
8478 (isTruncMask(M, VT, false, false) ||
8479 isTruncMask(M, VT, false, true) ||
8480 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8481 return true;
8482 else
8483 return false;
8484}
8485
8486/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8487/// the specified operations to build the shuffle.
8488static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8489 SDValue RHS, SelectionDAG &DAG,
8490 const SDLoc &dl) {
8491 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8492 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8493 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8494
8495 if (OpNum == OP_COPY) {
8496 if (LHSID == (1*9+2)*9+3) return LHS;
8497 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8498 return RHS;
8499 }
8500
8501 SDValue OpLHS, OpRHS;
8502 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8503 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8504 EVT VT = OpLHS.getValueType();
8505
8506 switch (OpNum) {
8507 default: llvm_unreachable("Unknown shuffle opcode!");
8508 case OP_VREV:
8509 // VREV divides the vector in half and swaps within the half.
8510 if (VT.getScalarSizeInBits() == 32)
8511 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8512 // vrev <4 x i16> -> VREV32
8513 if (VT.getScalarSizeInBits() == 16)
8514 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8515 // vrev <4 x i8> -> VREV16
8516 assert(VT.getScalarSizeInBits() == 8);
8517 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8518 case OP_VDUP0:
8519 case OP_VDUP1:
8520 case OP_VDUP2:
8521 case OP_VDUP3:
8522 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8523 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8524 case OP_VEXT1:
8525 case OP_VEXT2:
8526 case OP_VEXT3:
8527 return DAG.getNode(ARMISD::VEXT, dl, VT,
8528 OpLHS, OpRHS,
8529 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8530 case OP_VUZPL:
8531 case OP_VUZPR:
8532 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8533 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8534 case OP_VZIPL:
8535 case OP_VZIPR:
8536 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8537 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8538 case OP_VTRNL:
8539 case OP_VTRNR:
8540 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8541 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8542 }
8543}
8544
8546 ArrayRef<int> ShuffleMask,
8547 SelectionDAG &DAG) {
8548 // Check to see if we can use the VTBL instruction.
8549 SDValue V1 = Op.getOperand(0);
8550 SDValue V2 = Op.getOperand(1);
8551 SDLoc DL(Op);
8552
8553 SmallVector<SDValue, 8> VTBLMask;
8554 for (int I : ShuffleMask)
8555 VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
8556
8557 if (V2.getNode()->isUndef())
8558 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8559 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8560
8561 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8562 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8563}
8564
8566 SDLoc DL(Op);
8567 EVT VT = Op.getValueType();
8568
8569 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8570 "Expect an v8i16/v16i8 type");
8571 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8572 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8573 // extract the first 8 bytes into the top double word and the last 8 bytes
8574 // into the bottom double word, through a new vector shuffle that will be
8575 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8576 std::vector<int> NewMask;
8577 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8578 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8579 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8580 NewMask.push_back(i);
8581 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8582}
8583
8585 switch (VT.getSimpleVT().SimpleTy) {
8586 case MVT::v2i1:
8587 return MVT::v2f64;
8588 case MVT::v4i1:
8589 return MVT::v4i32;
8590 case MVT::v8i1:
8591 return MVT::v8i16;
8592 case MVT::v16i1:
8593 return MVT::v16i8;
8594 default:
8595 llvm_unreachable("Unexpected vector predicate type");
8596 }
8597}
8598
8600 SelectionDAG &DAG) {
8601 // Converting from boolean predicates to integers involves creating a vector
8602 // of all ones or all zeroes and selecting the lanes based upon the real
8603 // predicate.
8605 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8606 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8607
8608 SDValue AllZeroes =
8609 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8610 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8611
8612 // Get full vector type from predicate type
8614
8615 SDValue RecastV1;
8616 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8617 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8618 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8619 // since we know in hardware the sizes are really the same.
8620 if (VT != MVT::v16i1)
8621 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8622 else
8623 RecastV1 = Pred;
8624
8625 // Select either all ones or zeroes depending upon the real predicate bits.
8626 SDValue PredAsVector =
8627 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8628
8629 // Recast our new predicate-as-integer v16i8 vector into something
8630 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8631 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8632}
8633
8635 const ARMSubtarget *ST) {
8636 EVT VT = Op.getValueType();
8637 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8638 ArrayRef<int> ShuffleMask = SVN->getMask();
8639
8640 assert(ST->hasMVEIntegerOps() &&
8641 "No support for vector shuffle of boolean predicates");
8642
8643 SDValue V1 = Op.getOperand(0);
8644 SDValue V2 = Op.getOperand(1);
8645 SDLoc dl(Op);
8646 if (isReverseMask(ShuffleMask, VT)) {
8647 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8648 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8649 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8650 DAG.getConstant(16, dl, MVT::i32));
8651 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8652 }
8653
8654 // Until we can come up with optimised cases for every single vector
8655 // shuffle in existence we have chosen the least painful strategy. This is
8656 // to essentially promote the boolean predicate to a 8-bit integer, where
8657 // each predicate represents a byte. Then we fall back on a normal integer
8658 // vector shuffle and convert the result back into a predicate vector. In
8659 // many cases the generated code might be even better than scalar code
8660 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8661 // fields in a register into 8 other arbitrary 2-bit fields!
8662 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8663 EVT NewVT = PredAsVector1.getValueType();
8664 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8665 : PromoteMVEPredVector(dl, V2, VT, DAG);
8666 assert(PredAsVector2.getValueType() == NewVT &&
8667 "Expected identical vector type in expanded i1 shuffle!");
8668
8669 // Do the shuffle!
8670 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8671 PredAsVector2, ShuffleMask);
8672
8673 // Now return the result of comparing the shuffled vector with zero,
8674 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8675 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8676 if (VT == MVT::v2i1) {
8677 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8678 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8679 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8680 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8681 }
8682 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8683 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8684}
8685
8687 ArrayRef<int> ShuffleMask,
8688 SelectionDAG &DAG) {
8689 // Attempt to lower the vector shuffle using as many whole register movs as
8690 // possible. This is useful for types smaller than 32bits, which would
8691 // often otherwise become a series for grp movs.
8692 SDLoc dl(Op);
8693 EVT VT = Op.getValueType();
8694 if (VT.getScalarSizeInBits() >= 32)
8695 return SDValue();
8696
8697 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8698 "Unexpected vector type");
8699 int NumElts = VT.getVectorNumElements();
8700 int QuarterSize = NumElts / 4;
8701 // The four final parts of the vector, as i32's
8702 SDValue Parts[4];
8703
8704 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8705 // <u,u,u,u>), returning the vmov lane index
8706 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8707 // Detect which mov lane this would be from the first non-undef element.
8708 int MovIdx = -1;
8709 for (int i = 0; i < Length; i++) {
8710 if (ShuffleMask[Start + i] >= 0) {
8711 if (ShuffleMask[Start + i] % Length != i)
8712 return -1;
8713 MovIdx = ShuffleMask[Start + i] / Length;
8714 break;
8715 }
8716 }
8717 // If all items are undef, leave this for other combines
8718 if (MovIdx == -1)
8719 return -1;
8720 // Check the remaining values are the correct part of the same mov
8721 for (int i = 1; i < Length; i++) {
8722 if (ShuffleMask[Start + i] >= 0 &&
8723 (ShuffleMask[Start + i] / Length != MovIdx ||
8724 ShuffleMask[Start + i] % Length != i))
8725 return -1;
8726 }
8727 return MovIdx;
8728 };
8729
8730 for (int Part = 0; Part < 4; ++Part) {
8731 // Does this part look like a mov
8732 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8733 if (Elt != -1) {
8734 SDValue Input = Op->getOperand(0);
8735 if (Elt >= 4) {
8736 Input = Op->getOperand(1);
8737 Elt -= 4;
8738 }
8739 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8740 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8741 DAG.getConstant(Elt, dl, MVT::i32));
8742 }
8743 }
8744
8745 // Nothing interesting found, just return
8746 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8747 return SDValue();
8748
8749 // The other parts need to be built with the old shuffle vector, cast to a
8750 // v4i32 and extract_vector_elts
8751 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8752 SmallVector<int, 16> NewShuffleMask;
8753 for (int Part = 0; Part < 4; ++Part)
8754 for (int i = 0; i < QuarterSize; i++)
8755 NewShuffleMask.push_back(
8756 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8757 SDValue NewShuffle = DAG.getVectorShuffle(
8758 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8759 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8760
8761 for (int Part = 0; Part < 4; ++Part)
8762 if (!Parts[Part])
8763 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8764 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8765 }
8766 // Build a vector out of the various parts and bitcast it back to the original
8767 // type.
8768 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8769 return DAG.getBitcast(VT, NewVec);
8770}
8771
8773 ArrayRef<int> ShuffleMask,
8774 SelectionDAG &DAG) {
8775 SDValue V1 = Op.getOperand(0);
8776 SDValue V2 = Op.getOperand(1);
8777 EVT VT = Op.getValueType();
8778 unsigned NumElts = VT.getVectorNumElements();
8779
8780 // An One-Off Identity mask is one that is mostly an identity mask from as
8781 // single source but contains a single element out-of-place, either from a
8782 // different vector or from another position in the same vector. As opposed to
8783 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8784 // pair directly.
8785 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8786 int &OffElement) {
8787 OffElement = -1;
8788 int NonUndef = 0;
8789 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8790 if (Mask[i] == -1)
8791 continue;
8792 NonUndef++;
8793 if (Mask[i] != i + BaseOffset) {
8794 if (OffElement == -1)
8795 OffElement = i;
8796 else
8797 return false;
8798 }
8799 }
8800 return NonUndef > 2 && OffElement != -1;
8801 };
8802 int OffElement;
8803 SDValue VInput;
8804 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8805 VInput = V1;
8806 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8807 VInput = V2;
8808 else
8809 return SDValue();
8810
8811 SDLoc dl(Op);
8812 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8813 ? MVT::i32
8814 : VT.getScalarType();
8815 SDValue Elt = DAG.getNode(
8816 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8817 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8818 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8819 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8820 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8821}
8822
8824 const ARMSubtarget *ST) {
8825 SDValue V1 = Op.getOperand(0);
8826 SDValue V2 = Op.getOperand(1);
8827 SDLoc dl(Op);
8828 EVT VT = Op.getValueType();
8829 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8830 unsigned EltSize = VT.getScalarSizeInBits();
8831
8832 if (ST->hasMVEIntegerOps() && EltSize == 1)
8833 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8834
8835 // Convert shuffles that are directly supported on NEON to target-specific
8836 // DAG nodes, instead of keeping them as shuffles and matching them again
8837 // during code selection. This is more efficient and avoids the possibility
8838 // of inconsistencies between legalization and selection.
8839 // FIXME: floating-point vectors should be canonicalized to integer vectors
8840 // of the same time so that they get CSEd properly.
8841 ArrayRef<int> ShuffleMask = SVN->getMask();
8842
8843 if (EltSize <= 32) {
8844 if (SVN->isSplat()) {
8845 int Lane = SVN->getSplatIndex();
8846 // If this is undef splat, generate it via "just" vdup, if possible.
8847 if (Lane == -1) Lane = 0;
8848
8849 // Test if V1 is a SCALAR_TO_VECTOR.
8850 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8851 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8852 }
8853 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8854 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8855 // reaches it).
8856 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8857 !isa<ConstantSDNode>(V1.getOperand(0))) {
8858 bool IsScalarToVector = true;
8859 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8860 if (!V1.getOperand(i).isUndef()) {
8861 IsScalarToVector = false;
8862 break;
8863 }
8864 if (IsScalarToVector)
8865 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8866 }
8867 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8868 DAG.getConstant(Lane, dl, MVT::i32));
8869 }
8870
8871 bool ReverseVEXT = false;
8872 unsigned Imm = 0;
8873 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8874 if (ReverseVEXT)
8875 std::swap(V1, V2);
8876 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8877 DAG.getConstant(Imm, dl, MVT::i32));
8878 }
8879
8880 if (isVREVMask(ShuffleMask, VT, 64))
8881 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8882 if (isVREVMask(ShuffleMask, VT, 32))
8883 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8884 if (isVREVMask(ShuffleMask, VT, 16))
8885 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8886
8887 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8888 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8889 DAG.getConstant(Imm, dl, MVT::i32));
8890 }
8891
8892 // Check for Neon shuffles that modify both input vectors in place.
8893 // If both results are used, i.e., if there are two shuffles with the same
8894 // source operands and with masks corresponding to both results of one of
8895 // these operations, DAG memoization will ensure that a single node is
8896 // used for both shuffles.
8897 unsigned WhichResult = 0;
8898 bool isV_UNDEF = false;
8899 if (ST->hasNEON()) {
8900 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8901 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8902 if (isV_UNDEF)
8903 V2 = V1;
8904 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8905 .getValue(WhichResult);
8906 }
8907 }
8908 if (ST->hasMVEIntegerOps()) {
8909 if (isVMOVNMask(ShuffleMask, VT, false, false))
8910 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8911 DAG.getConstant(0, dl, MVT::i32));
8912 if (isVMOVNMask(ShuffleMask, VT, true, false))
8913 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8914 DAG.getConstant(1, dl, MVT::i32));
8915 if (isVMOVNMask(ShuffleMask, VT, true, true))
8916 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8917 DAG.getConstant(1, dl, MVT::i32));
8918 }
8919
8920 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8921 // shuffles that produce a result larger than their operands with:
8922 // shuffle(concat(v1, undef), concat(v2, undef))
8923 // ->
8924 // shuffle(concat(v1, v2), undef)
8925 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8926 //
8927 // This is useful in the general case, but there are special cases where
8928 // native shuffles produce larger results: the two-result ops.
8929 //
8930 // Look through the concat when lowering them:
8931 // shuffle(concat(v1, v2), undef)
8932 // ->
8933 // concat(VZIP(v1, v2):0, :1)
8934 //
8935 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8936 SDValue SubV1 = V1->getOperand(0);
8937 SDValue SubV2 = V1->getOperand(1);
8938 EVT SubVT = SubV1.getValueType();
8939
8940 // We expect these to have been canonicalized to -1.
8941 assert(llvm::all_of(ShuffleMask, [&](int i) {
8942 return i < (int)VT.getVectorNumElements();
8943 }) && "Unexpected shuffle index into UNDEF operand!");
8944
8945 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8946 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8947 if (isV_UNDEF)
8948 SubV2 = SubV1;
8949 assert((WhichResult == 0) &&
8950 "In-place shuffle of concat can only have one result!");
8951 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8952 SubV1, SubV2);
8953 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8954 Res.getValue(1));
8955 }
8956 }
8957 }
8958
8959 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8960 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8961 return V;
8962
8963 for (bool Top : {false, true}) {
8964 for (bool SingleSource : {false, true}) {
8965 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8966 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8967 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8968 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8969 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8970 SingleSource ? V1 : V2);
8971 if (Top) {
8972 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8973 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8974 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8975 }
8976 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8977 }
8978 }
8979 }
8980 }
8981
8982 // If the shuffle is not directly supported and it has 4 elements, use
8983 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8984 unsigned NumElts = VT.getVectorNumElements();
8985 if (NumElts == 4) {
8986 unsigned PFIndexes[4];
8987 for (unsigned i = 0; i != 4; ++i) {
8988 if (ShuffleMask[i] < 0)
8989 PFIndexes[i] = 8;
8990 else
8991 PFIndexes[i] = ShuffleMask[i];
8992 }
8993
8994 // Compute the index in the perfect shuffle table.
8995 unsigned PFTableIndex =
8996 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8997 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8998 unsigned Cost = (PFEntry >> 30);
8999
9000 if (Cost <= 4) {
9001 if (ST->hasNEON())
9002 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9003 else if (isLegalMVEShuffleOp(PFEntry)) {
9004 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9005 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9006 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
9007 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
9008 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
9009 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9010 }
9011 }
9012 }
9013
9014 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9015 if (EltSize >= 32) {
9016 // Do the expansion with floating-point types, since that is what the VFP
9017 // registers are defined to use, and since i64 is not legal.
9018 EVT EltVT = EVT::getFloatingPointVT(EltSize);
9019 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
9020 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
9021 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
9023 for (unsigned i = 0; i < NumElts; ++i) {
9024 if (ShuffleMask[i] < 0)
9025 Ops.push_back(DAG.getUNDEF(EltVT));
9026 else
9027 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
9028 ShuffleMask[i] < (int)NumElts ? V1 : V2,
9029 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9030 dl, MVT::i32)));
9031 }
9032 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9033 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9034 }
9035
9036 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9037 isReverseMask(ShuffleMask, VT))
9038 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9039
9040 if (ST->hasNEON() && VT == MVT::v8i8)
9041 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9042 return NewOp;
9043
9044 if (ST->hasMVEIntegerOps())
9045 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9046 return NewOp;
9047
9048 return SDValue();
9049}
9050
9052 const ARMSubtarget *ST) {
9053 EVT VecVT = Op.getOperand(0).getValueType();
9054 SDLoc dl(Op);
9055
9056 assert(ST->hasMVEIntegerOps() &&
9057 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9058
9059 SDValue Conv =
9060 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9061 unsigned Lane = Op.getConstantOperandVal(2);
9062 unsigned LaneWidth =
9064 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9065 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9066 Op.getOperand(1), DAG.getValueType(MVT::i1));
9067 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9068 DAG.getConstant(~Mask, dl, MVT::i32));
9069 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9070}
9071
9072SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9073 SelectionDAG &DAG) const {
9074 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9075 SDValue Lane = Op.getOperand(2);
9076 if (!isa<ConstantSDNode>(Lane))
9077 return SDValue();
9078
9079 SDValue Elt = Op.getOperand(1);
9080 EVT EltVT = Elt.getValueType();
9081
9082 if (Subtarget->hasMVEIntegerOps() &&
9083 Op.getValueType().getScalarSizeInBits() == 1)
9084 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9085
9086 if (getTypeAction(*DAG.getContext(), EltVT) ==
9088 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9089 // but the type system will try to do that if we don't intervene.
9090 // Reinterpret any such vector-element insertion as one with the
9091 // corresponding integer types.
9092
9093 SDLoc dl(Op);
9094
9095 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9096 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9098
9099 SDValue VecIn = Op.getOperand(0);
9100 EVT VecVT = VecIn.getValueType();
9101 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9102 VecVT.getVectorNumElements());
9103
9104 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9105 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9106 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9107 IVecIn, IElt, Lane);
9108 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9109 }
9110
9111 return Op;
9112}
9113
9115 const ARMSubtarget *ST) {
9116 EVT VecVT = Op.getOperand(0).getValueType();
9117 SDLoc dl(Op);
9118
9119 assert(ST->hasMVEIntegerOps() &&
9120 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9121
9122 SDValue Conv =
9123 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9124 unsigned Lane = Op.getConstantOperandVal(1);
9125 unsigned LaneWidth =
9127 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9128 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9129 return Shift;
9130}
9131
9133 const ARMSubtarget *ST) {
9134 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9135 SDValue Lane = Op.getOperand(1);
9136 if (!isa<ConstantSDNode>(Lane))
9137 return SDValue();
9138
9139 SDValue Vec = Op.getOperand(0);
9140 EVT VT = Vec.getValueType();
9141
9142 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9143 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9144
9145 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9146 SDLoc dl(Op);
9147 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9148 }
9149
9150 return Op;
9151}
9152
9154 const ARMSubtarget *ST) {
9155 SDLoc dl(Op);
9156 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9157 "Unexpected custom CONCAT_VECTORS lowering");
9159 "Unexpected custom CONCAT_VECTORS lowering");
9160 assert(ST->hasMVEIntegerOps() &&
9161 "CONCAT_VECTORS lowering only supported for MVE");
9162
9163 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9164 EVT Op1VT = V1.getValueType();
9165 EVT Op2VT = V2.getValueType();
9166 assert(Op1VT == Op2VT && "Operand types don't match!");
9167 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9168 "Unexpected i1 concat operations!");
9169 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9170
9171 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9172 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9173
9174 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9175 // promoted to v8i16, etc.
9176 MVT ElType =
9178 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9179
9180 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9181 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9182 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9183 // ConcatVT.
9184 SDValue ConVec =
9185 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9186 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9187 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9188 }
9189
9190 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9191 // to be the right size for the destination. For example, if Op1 is v4i1
9192 // then the promoted vector is v4i32. The result of concatenation gives a
9193 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9194 // needs truncating to i16 and inserting in the result.
9195 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9196 EVT NewVT = NewV.getValueType();
9197 EVT ConcatVT = ConVec.getValueType();
9198 unsigned ExtScale = 1;
9199 if (NewVT == MVT::v2f64) {
9200 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9201 ExtScale = 2;
9202 }
9203 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9204 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9205 DAG.getIntPtrConstant(i * ExtScale, dl));
9206 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9207 DAG.getConstant(j, dl, MVT::i32));
9208 }
9209 return ConVec;
9210 };
9211 unsigned j = 0;
9212 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9213 ConVec = ExtractInto(NewV1, ConVec, j);
9214 ConVec = ExtractInto(NewV2, ConVec, j);
9215
9216 // Now return the result of comparing the subvector with zero, which will
9217 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9218 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9219 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9220 };
9221
9222 // Concat each pair of subvectors and pack into the lower half of the array.
9223 SmallVector<SDValue> ConcatOps(Op->ops());
9224 while (ConcatOps.size() > 1) {
9225 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9226 SDValue V1 = ConcatOps[I];
9227 SDValue V2 = ConcatOps[I + 1];
9228 ConcatOps[I / 2] = ConcatPair(V1, V2);
9229 }
9230 ConcatOps.resize(ConcatOps.size() / 2);
9231 }
9232 return ConcatOps[0];
9233}
9234
9236 const ARMSubtarget *ST) {
9237 EVT VT = Op->getValueType(0);
9238 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9239 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9240
9241 // The only time a CONCAT_VECTORS operation can have legal types is when
9242 // two 64-bit vectors are concatenated to a 128-bit vector.
9243 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9244 "unexpected CONCAT_VECTORS");
9245 SDLoc dl(Op);
9246 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9247 SDValue Op0 = Op.getOperand(0);
9248 SDValue Op1 = Op.getOperand(1);
9249 if (!Op0.isUndef())
9250 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9251 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9252 DAG.getIntPtrConstant(0, dl));
9253 if (!Op1.isUndef())
9254 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9255 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9256 DAG.getIntPtrConstant(1, dl));
9257 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9258}
9259
9261 const ARMSubtarget *ST) {
9262 SDValue V1 = Op.getOperand(0);
9263 SDValue V2 = Op.getOperand(1);
9264 SDLoc dl(Op);
9265 EVT VT = Op.getValueType();
9266 EVT Op1VT = V1.getValueType();
9267 unsigned NumElts = VT.getVectorNumElements();
9268 unsigned Index = V2->getAsZExtVal();
9269
9270 assert(VT.getScalarSizeInBits() == 1 &&
9271 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9272 assert(ST->hasMVEIntegerOps() &&
9273 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9274
9275 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9276
9277 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9278 // promoted to v8i16, etc.
9279
9281
9282 if (NumElts == 2) {
9283 EVT SubVT = MVT::v4i32;
9284 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9285 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9286 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9287 DAG.getIntPtrConstant(i, dl));
9288 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9289 DAG.getConstant(j, dl, MVT::i32));
9290 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9291 DAG.getConstant(j + 1, dl, MVT::i32));
9292 }
9293 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9294 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9295 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9296 }
9297
9298 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9299 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9300 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9301 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9302 DAG.getIntPtrConstant(i, dl));
9303 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9304 DAG.getConstant(j, dl, MVT::i32));
9305 }
9306
9307 // Now return the result of comparing the subvector with zero,
9308 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9309 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9310 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9311}
9312
9313// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9315 const ARMSubtarget *ST) {
9316 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9317 EVT VT = N->getValueType(0);
9318 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9319 "Expected a vector i1 type!");
9320 SDValue Op = N->getOperand(0);
9321 EVT FromVT = Op.getValueType();
9322 SDLoc DL(N);
9323
9324 SDValue And =
9325 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9326 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9327 DAG.getCondCode(ISD::SETNE));
9328}
9329
9331 const ARMSubtarget *Subtarget) {
9332 if (!Subtarget->hasMVEIntegerOps())
9333 return SDValue();
9334
9335 EVT ToVT = N->getValueType(0);
9336 if (ToVT.getScalarType() == MVT::i1)
9337 return LowerTruncatei1(N, DAG, Subtarget);
9338
9339 // MVE does not have a single instruction to perform the truncation of a v4i32
9340 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9341 // Most of the instructions in MVE follow the 'Beats' system, where moving
9342 // values from different lanes is usually something that the instructions
9343 // avoid.
9344 //
9345 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9346 // which take a the top/bottom half of a larger lane and extend it (or do the
9347 // opposite, truncating into the top/bottom lane from a larger lane). Note
9348 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9349 // bottom 16bits from each vector lane. This works really well with T/B
9350 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9351 // to move order.
9352 //
9353 // But truncates and sext/zext are always going to be fairly common from llvm.
9354 // We have several options for how to deal with them:
9355 // - Wherever possible combine them into an instruction that makes them
9356 // "free". This includes loads/stores, which can perform the trunc as part
9357 // of the memory operation. Or certain shuffles that can be turned into
9358 // VMOVN/VMOVL.
9359 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9360 // trunc(mul(sext(a), sext(b))) may become
9361 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9362 // this case can use VMULL). This is performed in the
9363 // MVELaneInterleavingPass.
9364 // - Otherwise we have an option. By default we would expand the
9365 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9366 // registers. One for each vector lane in the vector. This can obviously be
9367 // very expensive.
9368 // - The other option is to use the fact that loads/store can extend/truncate
9369 // to turn a trunc into two truncating stack stores and a stack reload. This
9370 // becomes 3 back-to-back memory operations, but at least that is less than
9371 // all the insert/extracts.
9372 //
9373 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9374 // are either optimized where they can be, or eventually lowered into stack
9375 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9376 // two early, where other instructions would be better, and stops us from
9377 // having to reconstruct multiple buildvector shuffles into loads/stores.
9378 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9379 return SDValue();
9380 EVT FromVT = N->getOperand(0).getValueType();
9381 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9382 return SDValue();
9383
9384 SDValue Lo, Hi;
9385 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9386 SDLoc DL(N);
9387 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9388}
9389
9391 const ARMSubtarget *Subtarget) {
9392 if (!Subtarget->hasMVEIntegerOps())
9393 return SDValue();
9394
9395 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9396
9397 EVT ToVT = N->getValueType(0);
9398 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9399 return SDValue();
9400 SDValue Op = N->getOperand(0);
9401 EVT FromVT = Op.getValueType();
9402 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9403 return SDValue();
9404
9405 SDLoc DL(N);
9406 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9407 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9408 ExtVT = MVT::v8i16;
9409
9410 unsigned Opcode =
9412 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9413 SDValue Ext1 = Ext.getValue(1);
9414
9415 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9416 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9417 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9418 }
9419
9420 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9421}
9422
9423/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9424/// element has been zero/sign-extended, depending on the isSigned parameter,
9425/// from an integer type half its size.
9427 bool isSigned) {
9428 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9429 EVT VT = N->getValueType(0);
9430 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9431 SDNode *BVN = N->getOperand(0).getNode();
9432 if (BVN->getValueType(0) != MVT::v4i32 ||
9433 BVN->getOpcode() != ISD::BUILD_VECTOR)
9434 return false;
9435 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9436 unsigned HiElt = 1 - LoElt;
9437 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9438 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9439 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9440 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9441 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9442 return false;
9443 if (isSigned) {
9444 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9445 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9446 return true;
9447 } else {
9448 if (Hi0->isZero() && Hi1->isZero())
9449 return true;
9450 }
9451 return false;
9452 }
9453
9454 if (N->getOpcode() != ISD::BUILD_VECTOR)
9455 return false;
9456
9457 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9458 SDNode *Elt = N->getOperand(i).getNode();
9459 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9460 unsigned EltSize = VT.getScalarSizeInBits();
9461 unsigned HalfSize = EltSize / 2;
9462 if (isSigned) {
9463 if (!isIntN(HalfSize, C->getSExtValue()))
9464 return false;
9465 } else {
9466 if (!isUIntN(HalfSize, C->getZExtValue()))
9467 return false;
9468 }
9469 continue;
9470 }
9471 return false;
9472 }
9473
9474 return true;
9475}
9476
9477/// isSignExtended - Check if a node is a vector value that is sign-extended
9478/// or a constant BUILD_VECTOR with sign-extended elements.
9480 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9481 return true;
9482 if (isExtendedBUILD_VECTOR(N, DAG, true))
9483 return true;
9484 return false;
9485}
9486
9487/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9488/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9490 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9492 return true;
9493 if (isExtendedBUILD_VECTOR(N, DAG, false))
9494 return true;
9495 return false;
9496}
9497
9498static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9499 if (OrigVT.getSizeInBits() >= 64)
9500 return OrigVT;
9501
9502 assert(OrigVT.isSimple() && "Expecting a simple value type");
9503
9504 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9505 switch (OrigSimpleTy) {
9506 default: llvm_unreachable("Unexpected Vector Type");
9507 case MVT::v2i8:
9508 case MVT::v2i16:
9509 return MVT::v2i32;
9510 case MVT::v4i8:
9511 return MVT::v4i16;
9512 }
9513}
9514
9515/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9516/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9517/// We insert the required extension here to get the vector to fill a D register.
9519 const EVT &OrigTy,
9520 const EVT &ExtTy,
9521 unsigned ExtOpcode) {
9522 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9523 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9524 // 64-bits we need to insert a new extension so that it will be 64-bits.
9525 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9526 if (OrigTy.getSizeInBits() >= 64)
9527 return N;
9528
9529 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9530 EVT NewVT = getExtensionTo64Bits(OrigTy);
9531
9532 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9533}
9534
9535/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9536/// does not do any sign/zero extension. If the original vector is less
9537/// than 64 bits, an appropriate extension will be added after the load to
9538/// reach a total size of 64 bits. We have to add the extension separately
9539/// because ARM does not have a sign/zero extending load for vectors.
9541 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9542
9543 // The load already has the right type.
9544 if (ExtendedTy == LD->getMemoryVT())
9545 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9546 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9547 LD->getMemOperand()->getFlags());
9548
9549 // We need to create a zextload/sextload. We cannot just create a load
9550 // followed by a zext/zext node because LowerMUL is also run during normal
9551 // operation legalization where we can't create illegal types.
9552 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9553 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9554 LD->getMemoryVT(), LD->getAlign(),
9555 LD->getMemOperand()->getFlags());
9556}
9557
9558/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9559/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9560/// the unextended value. The unextended vector should be 64 bits so that it can
9561/// be used as an operand to a VMULL instruction. If the original vector size
9562/// before extension is less than 64 bits we add a an extension to resize
9563/// the vector to 64 bits.
9565 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9566 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9567 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9568 N->getOperand(0)->getValueType(0),
9569 N->getValueType(0),
9570 N->getOpcode());
9571
9572 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9573 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9574 "Expected extending load");
9575
9576 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9577 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9578 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9579 SDValue extLoad =
9580 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9581 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9582
9583 return newLoad;
9584 }
9585
9586 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9587 // have been legalized as a BITCAST from v4i32.
9588 if (N->getOpcode() == ISD::BITCAST) {
9589 SDNode *BVN = N->getOperand(0).getNode();
9591 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9592 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9593 return DAG.getBuildVector(
9594 MVT::v2i32, SDLoc(N),
9595 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9596 }
9597 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9598 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9599 EVT VT = N->getValueType(0);
9600 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9601 unsigned NumElts = VT.getVectorNumElements();
9602 MVT TruncVT = MVT::getIntegerVT(EltSize);
9604 SDLoc dl(N);
9605 for (unsigned i = 0; i != NumElts; ++i) {
9606 const APInt &CInt = N->getConstantOperandAPInt(i);
9607 // Element types smaller than 32 bits are not legal, so use i32 elements.
9608 // The values are implicitly truncated so sext vs. zext doesn't matter.
9609 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9610 }
9611 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9612}
9613
9614static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9615 unsigned Opcode = N->getOpcode();
9616 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9617 SDNode *N0 = N->getOperand(0).getNode();
9618 SDNode *N1 = N->getOperand(1).getNode();
9619 return N0->hasOneUse() && N1->hasOneUse() &&
9620 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9621 }
9622 return false;
9623}
9624
9625static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9626 unsigned Opcode = N->getOpcode();
9627 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9628 SDNode *N0 = N->getOperand(0).getNode();
9629 SDNode *N1 = N->getOperand(1).getNode();
9630 return N0->hasOneUse() && N1->hasOneUse() &&
9631 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9632 }
9633 return false;
9634}
9635
9637 // Multiplications are only custom-lowered for 128-bit vectors so that
9638 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9639 EVT VT = Op.getValueType();
9640 assert(VT.is128BitVector() && VT.isInteger() &&
9641 "unexpected type for custom-lowering ISD::MUL");
9642 SDNode *N0 = Op.getOperand(0).getNode();
9643 SDNode *N1 = Op.getOperand(1).getNode();
9644 unsigned NewOpc = 0;
9645 bool isMLA = false;
9646 bool isN0SExt = isSignExtended(N0, DAG);
9647 bool isN1SExt = isSignExtended(N1, DAG);
9648 if (isN0SExt && isN1SExt)
9649 NewOpc = ARMISD::VMULLs;
9650 else {
9651 bool isN0ZExt = isZeroExtended(N0, DAG);
9652 bool isN1ZExt = isZeroExtended(N1, DAG);
9653 if (isN0ZExt && isN1ZExt)
9654 NewOpc = ARMISD::VMULLu;
9655 else if (isN1SExt || isN1ZExt) {
9656 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9657 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9658 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9659 NewOpc = ARMISD::VMULLs;
9660 isMLA = true;
9661 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9662 NewOpc = ARMISD::VMULLu;
9663 isMLA = true;
9664 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9665 std::swap(N0, N1);
9666 NewOpc = ARMISD::VMULLu;
9667 isMLA = true;
9668 }
9669 }
9670
9671 if (!NewOpc) {
9672 if (VT == MVT::v2i64)
9673 // Fall through to expand this. It is not legal.
9674 return SDValue();
9675 else
9676 // Other vector multiplications are legal.
9677 return Op;
9678 }
9679 }
9680
9681 // Legalize to a VMULL instruction.
9682 SDLoc DL(Op);
9683 SDValue Op0;
9684 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9685 if (!isMLA) {
9686 Op0 = SkipExtensionForVMULL(N0, DAG);
9688 Op1.getValueType().is64BitVector() &&
9689 "unexpected types for extended operands to VMULL");
9690 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9691 }
9692
9693 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9694 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9695 // vmull q0, d4, d6
9696 // vmlal q0, d5, d6
9697 // is faster than
9698 // vaddl q0, d4, d5
9699 // vmovl q1, d6
9700 // vmul q0, q0, q1
9701 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9702 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9703 EVT Op1VT = Op1.getValueType();
9704 return DAG.getNode(N0->getOpcode(), DL, VT,
9705 DAG.getNode(NewOpc, DL, VT,
9706 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9707 DAG.getNode(NewOpc, DL, VT,
9708 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9709}
9710
9712 SelectionDAG &DAG) {
9713 // TODO: Should this propagate fast-math-flags?
9714
9715 // Convert to float
9716 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9717 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9718 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9719 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9720 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9721 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9722 // Get reciprocal estimate.
9723 // float4 recip = vrecpeq_f32(yf);
9724 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9725 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9726 Y);
9727 // Because char has a smaller range than uchar, we can actually get away
9728 // without any newton steps. This requires that we use a weird bias
9729 // of 0xb000, however (again, this has been exhaustively tested).
9730 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9731 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9732 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9733 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9734 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9735 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9736 // Convert back to short.
9737 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9738 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9739 return X;
9740}
9741
9743 SelectionDAG &DAG) {
9744 // TODO: Should this propagate fast-math-flags?
9745
9746 SDValue N2;
9747 // Convert to float.
9748 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9749 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9750 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9751 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9752 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9753 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9754
9755 // Use reciprocal estimate and one refinement step.
9756 // float4 recip = vrecpeq_f32(yf);
9757 // recip *= vrecpsq_f32(yf, recip);
9758 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9759 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9760 N1);
9761 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9762 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9763 N1, N2);
9764 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9765 // Because short has a smaller range than ushort, we can actually get away
9766 // with only a single newton step. This requires that we use a weird bias
9767 // of 89, however (again, this has been exhaustively tested).
9768 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9769 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9770 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9771 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9772 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9773 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9774 // Convert back to integer and return.
9775 // return vmovn_s32(vcvt_s32_f32(result));
9776 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9777 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9778 return N0;
9779}
9780
9782 const ARMSubtarget *ST) {
9783 EVT VT = Op.getValueType();
9784 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9785 "unexpected type for custom-lowering ISD::SDIV");
9786
9787 SDLoc dl(Op);
9788 SDValue N0 = Op.getOperand(0);
9789 SDValue N1 = Op.getOperand(1);
9790 SDValue N2, N3;
9791
9792 if (VT == MVT::v8i8) {
9793 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9794 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9795
9796 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9797 DAG.getIntPtrConstant(4, dl));
9798 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9799 DAG.getIntPtrConstant(4, dl));
9800 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9801 DAG.getIntPtrConstant(0, dl));
9802 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9803 DAG.getIntPtrConstant(0, dl));
9804
9805 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9806 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9807
9808 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9809 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9810
9811 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9812 return N0;
9813 }
9814 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9815}
9816
9818 const ARMSubtarget *ST) {
9819 // TODO: Should this propagate fast-math-flags?
9820 EVT VT = Op.getValueType();
9821 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9822 "unexpected type for custom-lowering ISD::UDIV");
9823
9824 SDLoc dl(Op);
9825 SDValue N0 = Op.getOperand(0);
9826 SDValue N1 = Op.getOperand(1);
9827 SDValue N2, N3;
9828
9829 if (VT == MVT::v8i8) {
9830 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9831 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9832
9833 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9834 DAG.getIntPtrConstant(4, dl));
9835 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9836 DAG.getIntPtrConstant(4, dl));
9837 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9838 DAG.getIntPtrConstant(0, dl));
9839 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9840 DAG.getIntPtrConstant(0, dl));
9841
9842 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9843 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9844
9845 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9846 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9847
9848 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9849 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9850 MVT::i32),
9851 N0);
9852 return N0;
9853 }
9854
9855 // v4i16 sdiv ... Convert to float.
9856 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9857 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9858 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9859 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9860 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9861 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9862
9863 // Use reciprocal estimate and two refinement steps.
9864 // float4 recip = vrecpeq_f32(yf);
9865 // recip *= vrecpsq_f32(yf, recip);
9866 // recip *= vrecpsq_f32(yf, recip);
9867 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9868 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9869 BN1);
9870 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9871 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9872 BN1, N2);
9873 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9874 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9875 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9876 BN1, N2);
9877 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9878 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9879 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9880 // and that it will never cause us to return an answer too large).
9881 // float4 result = as_float4(as_int4(xf*recip) + 2);
9882 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9883 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9884 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9885 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9886 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9887 // Convert back to integer and return.
9888 // return vmovn_u32(vcvt_s32_f32(result));
9889 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9890 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9891 return N0;
9892}
9893
9895 SDNode *N = Op.getNode();
9896 EVT VT = N->getValueType(0);
9897 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9898
9899 SDValue Carry = Op.getOperand(2);
9900
9901 SDLoc DL(Op);
9902
9903 SDValue Result;
9904 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9905 // This converts the boolean value carry into the carry flag.
9906 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9907
9908 // Do the addition proper using the carry flag we wanted.
9909 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9910 Op.getOperand(1), Carry);
9911
9912 // Now convert the carry flag into a boolean value.
9913 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9914 } else {
9915 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9916 // have to invert the carry first.
9917 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9918 DAG.getConstant(1, DL, MVT::i32), Carry);
9919 // This converts the boolean value carry into the carry flag.
9920 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9921
9922 // Do the subtraction proper using the carry flag we wanted.
9923 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9924 Op.getOperand(1), Carry);
9925
9926 // Now convert the carry flag into a boolean value.
9927 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9928 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9929 // by ISD::USUBO_CARRY, so compute 1 - C.
9930 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9931 DAG.getConstant(1, DL, MVT::i32), Carry);
9932 }
9933
9934 // Return both values.
9935 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9936}
9937
9938SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9939 assert(Subtarget->isTargetDarwin());
9940
9941 // For iOS, we want to call an alternative entry point: __sincos_stret,
9942 // return values are passed via sret.
9943 SDLoc dl(Op);
9944 SDValue Arg = Op.getOperand(0);
9945 EVT ArgVT = Arg.getValueType();
9946 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9947 auto PtrVT = getPointerTy(DAG.getDataLayout());
9948
9950 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9951
9952 // Pair of floats / doubles used to pass the result.
9953 Type *RetTy = StructType::get(ArgTy, ArgTy);
9954 auto &DL = DAG.getDataLayout();
9955
9957 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9958 SDValue SRet;
9959 if (ShouldUseSRet) {
9960 // Create stack object for sret.
9961 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9962 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9963 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9964 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9965
9966 ArgListEntry Entry;
9967 Entry.Node = SRet;
9968 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9969 Entry.IsSExt = false;
9970 Entry.IsZExt = false;
9971 Entry.IsSRet = true;
9972 Args.push_back(Entry);
9974 }
9975
9976 ArgListEntry Entry;
9977 Entry.Node = Arg;
9978 Entry.Ty = ArgTy;
9979 Entry.IsSExt = false;
9980 Entry.IsZExt = false;
9981 Args.push_back(Entry);
9982
9983 RTLIB::Libcall LC =
9984 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9985 const char *LibcallName = getLibcallName(LC);
9987 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9988
9990 CLI.setDebugLoc(dl)
9991 .setChain(DAG.getEntryNode())
9992 .setCallee(CC, RetTy, Callee, std::move(Args))
9993 .setDiscardResult(ShouldUseSRet);
9994 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9995
9996 if (!ShouldUseSRet)
9997 return CallResult.first;
9998
9999 SDValue LoadSin =
10000 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
10001
10002 // Address of cos field.
10003 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
10004 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
10005 SDValue LoadCos =
10006 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
10007
10008 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
10009 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
10010 LoadSin.getValue(0), LoadCos.getValue(0));
10011}
10012
10013SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
10014 bool Signed,
10015 SDValue &Chain) const {
10016 EVT VT = Op.getValueType();
10017 assert((VT == MVT::i32 || VT == MVT::i64) &&
10018 "unexpected type for custom lowering DIV");
10019 SDLoc dl(Op);
10020
10021 const auto &DL = DAG.getDataLayout();
10022 const auto &TLI = DAG.getTargetLoweringInfo();
10023
10024 const char *Name = nullptr;
10025 if (Signed)
10026 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
10027 else
10028 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
10029
10031
10033
10034 for (auto AI : {1, 0}) {
10035 ArgListEntry Arg;
10036 Arg.Node = Op.getOperand(AI);
10037 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10038 Args.push_back(Arg);
10039 }
10040
10041 CallLoweringInfo CLI(DAG);
10042 CLI.setDebugLoc(dl)
10043 .setChain(Chain)
10045 ES, std::move(Args));
10046
10047 return LowerCallTo(CLI).first;
10048}
10049
10050// This is a code size optimisation: return the original SDIV node to
10051// DAGCombiner when we don't want to expand SDIV into a sequence of
10052// instructions, and an empty node otherwise which will cause the
10053// SDIV to be expanded in DAGCombine.
10054SDValue
10055ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10056 SelectionDAG &DAG,
10057 SmallVectorImpl<SDNode *> &Created) const {
10058 // TODO: Support SREM
10059 if (N->getOpcode() != ISD::SDIV)
10060 return SDValue();
10061
10062 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10063 const bool MinSize = ST.hasMinSize();
10064 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10065 : ST.hasDivideInARMMode();
10066
10067 // Don't touch vector types; rewriting this may lead to scalarizing
10068 // the int divs.
10069 if (N->getOperand(0).getValueType().isVector())
10070 return SDValue();
10071
10072 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10073 // hwdiv support for this to be really profitable.
10074 if (!(MinSize && HasDivide))
10075 return SDValue();
10076
10077 // ARM mode is a bit simpler than Thumb: we can handle large power
10078 // of 2 immediates with 1 mov instruction; no further checks required,
10079 // just return the sdiv node.
10080 if (!ST.isThumb())
10081 return SDValue(N, 0);
10082
10083 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10084 // and thus lose the code size benefits of a MOVS that requires only 2.
10085 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10086 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10087 if (Divisor.sgt(128))
10088 return SDValue();
10089
10090 return SDValue(N, 0);
10091}
10092
10093SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10094 bool Signed) const {
10095 assert(Op.getValueType() == MVT::i32 &&
10096 "unexpected type for custom lowering DIV");
10097 SDLoc dl(Op);
10098
10099 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10100 DAG.getEntryNode(), Op.getOperand(1));
10101
10102 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10103}
10104
10106 SDLoc DL(N);
10107 SDValue Op = N->getOperand(1);
10108 if (N->getValueType(0) == MVT::i32)
10109 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10110 SDValue Lo, Hi;
10111 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10112 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10113 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10114}
10115
10116void ARMTargetLowering::ExpandDIV_Windows(
10117 SDValue Op, SelectionDAG &DAG, bool Signed,
10119 const auto &DL = DAG.getDataLayout();
10120 const auto &TLI = DAG.getTargetLoweringInfo();
10121
10122 assert(Op.getValueType() == MVT::i64 &&
10123 "unexpected type for custom lowering DIV");
10124 SDLoc dl(Op);
10125
10126 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10127
10128 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10129
10130 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10131 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10132 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10133 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10134
10135 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10136}
10137
10139 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10140 EVT MemVT = LD->getMemoryVT();
10141 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10142 MemVT == MVT::v16i1) &&
10143 "Expected a predicate type!");
10144 assert(MemVT == Op.getValueType());
10145 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10146 "Expected a non-extending load");
10147 assert(LD->isUnindexed() && "Expected a unindexed load");
10148
10149 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10150 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10151 // need to make sure that 8/4/2 bits are actually loaded into the correct
10152 // place, which means loading the value and then shuffling the values into
10153 // the bottom bits of the predicate.
10154 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10155 // for BE).
10156 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10157 // a natural VMSR(load), so needs to be reversed.
10158
10159 SDLoc dl(Op);
10160 SDValue Load = DAG.getExtLoad(
10161 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10163 LD->getMemOperand());
10164 SDValue Val = Load;
10165 if (DAG.getDataLayout().isBigEndian())
10166 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10167 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10168 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10169 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10170 if (MemVT != MVT::v16i1)
10171 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10172 DAG.getConstant(0, dl, MVT::i32));
10173 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10174}
10175
10176void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10177 SelectionDAG &DAG) const {
10178 LoadSDNode *LD = cast<LoadSDNode>(N);
10179 EVT MemVT = LD->getMemoryVT();
10180 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10181
10182 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10183 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10184 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10185 SDLoc dl(N);
10187 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10188 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10189 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10190 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10191 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10192 Results.append({Pair, Result.getValue(2)});
10193 }
10194}
10195
10197 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10198 EVT MemVT = ST->getMemoryVT();
10199 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10200 MemVT == MVT::v16i1) &&
10201 "Expected a predicate type!");
10202 assert(MemVT == ST->getValue().getValueType());
10203 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10204 assert(ST->isUnindexed() && "Expected a unindexed store");
10205
10206 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10207 // top bits unset and a scalar store.
10208 SDLoc dl(Op);
10209 SDValue Build = ST->getValue();
10210 if (MemVT != MVT::v16i1) {
10212 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10213 unsigned Elt = DAG.getDataLayout().isBigEndian()
10214 ? MemVT.getVectorNumElements() - I - 1
10215 : I;
10216 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10217 DAG.getConstant(Elt, dl, MVT::i32)));
10218 }
10219 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10220 Ops.push_back(DAG.getUNDEF(MVT::i32));
10221 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10222 }
10223 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10224 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10225 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10226 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10227 DAG.getConstant(16, dl, MVT::i32));
10228 return DAG.getTruncStore(
10229 ST->getChain(), dl, GRP, ST->getBasePtr(),
10231 ST->getMemOperand());
10232}
10233
10235 const ARMSubtarget *Subtarget) {
10236 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10237 EVT MemVT = ST->getMemoryVT();
10238 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10239
10240 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10241 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10242 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10243 SDNode *N = Op.getNode();
10244 SDLoc dl(N);
10245
10246 SDValue Lo = DAG.getNode(
10247 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10248 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10249 MVT::i32));
10250 SDValue Hi = DAG.getNode(
10251 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10252 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10253 MVT::i32));
10254
10255 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10256 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10257 MemVT, ST->getMemOperand());
10258 } else if (Subtarget->hasMVEIntegerOps() &&
10259 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10260 MemVT == MVT::v16i1))) {
10261 return LowerPredicateStore(Op, DAG);
10262 }
10263
10264 return SDValue();
10265}
10266
10267static bool isZeroVector(SDValue N) {
10268 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10269 (N->getOpcode() == ARMISD::VMOVIMM &&
10270 isNullConstant(N->getOperand(0))));
10271}
10272
10274 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10275 MVT VT = Op.getSimpleValueType();
10276 SDValue Mask = N->getMask();
10277 SDValue PassThru = N->getPassThru();
10278 SDLoc dl(Op);
10279
10280 if (isZeroVector(PassThru))
10281 return Op;
10282
10283 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10284 // zero too, and other values are lowered to a select.
10285 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10286 DAG.getTargetConstant(0, dl, MVT::i32));
10287 SDValue NewLoad = DAG.getMaskedLoad(
10288 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10289 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10290 N->getExtensionType(), N->isExpandingLoad());
10291 SDValue Combo = NewLoad;
10292 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10293 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10294 isZeroVector(PassThru->getOperand(0));
10295 if (!PassThru.isUndef() && !PassThruIsCastZero)
10296 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10297 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10298}
10299
10301 const ARMSubtarget *ST) {
10302 if (!ST->hasMVEIntegerOps())
10303 return SDValue();
10304
10305 SDLoc dl(Op);
10306 unsigned BaseOpcode = 0;
10307 switch (Op->getOpcode()) {
10308 default: llvm_unreachable("Expected VECREDUCE opcode");
10309 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10310 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10311 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10312 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10313 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10314 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10315 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10316 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10317 }
10318
10319 SDValue Op0 = Op->getOperand(0);
10320 EVT VT = Op0.getValueType();
10321 EVT EltVT = VT.getVectorElementType();
10322 unsigned NumElts = VT.getVectorNumElements();
10323 unsigned NumActiveLanes = NumElts;
10324
10325 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10326 NumActiveLanes == 2) &&
10327 "Only expected a power 2 vector size");
10328
10329 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10330 // allows us to easily extract vector elements from the lanes.
10331 while (NumActiveLanes > 4) {
10332 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10333 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10334 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10335 NumActiveLanes /= 2;
10336 }
10337
10338 SDValue Res;
10339 if (NumActiveLanes == 4) {
10340 // The remaining 4 elements are summed sequentially
10341 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10342 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10343 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10344 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10345 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10346 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10347 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10348 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10349 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10350 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10351 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10352 } else {
10353 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10354 DAG.getConstant(0, dl, MVT::i32));
10355 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10356 DAG.getConstant(1, dl, MVT::i32));
10357 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10358 }
10359
10360 // Result type may be wider than element type.
10361 if (EltVT != Op->getValueType(0))
10362 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10363 return Res;
10364}
10365
10367 const ARMSubtarget *ST) {
10368 if (!ST->hasMVEFloatOps())
10369 return SDValue();
10370 return LowerVecReduce(Op, DAG, ST);
10371}
10372
10374 const ARMSubtarget *ST) {
10375 if (!ST->hasNEON())
10376 return SDValue();
10377
10378 SDLoc dl(Op);
10379 SDValue Op0 = Op->getOperand(0);
10380 EVT VT = Op0.getValueType();
10381 EVT EltVT = VT.getVectorElementType();
10382
10383 unsigned PairwiseIntrinsic = 0;
10384 switch (Op->getOpcode()) {
10385 default:
10386 llvm_unreachable("Expected VECREDUCE opcode");
10388 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10389 break;
10391 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10392 break;
10394 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10395 break;
10397 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10398 break;
10399 }
10400 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10401
10402 unsigned NumElts = VT.getVectorNumElements();
10403 unsigned NumActiveLanes = NumElts;
10404
10405 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10406 NumActiveLanes == 2) &&
10407 "Only expected a power 2 vector size");
10408
10409 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10410 if (VT.is128BitVector()) {
10411 SDValue Lo, Hi;
10412 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10413 VT = Lo.getValueType();
10414 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10415 NumActiveLanes /= 2;
10416 }
10417
10418 // Use pairwise reductions until one lane remains
10419 while (NumActiveLanes > 1) {
10420 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10421 NumActiveLanes /= 2;
10422 }
10423
10424 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10425 DAG.getConstant(0, dl, MVT::i32));
10426
10427 // Result type may be wider than element type.
10428 if (EltVT != Op.getValueType()) {
10429 unsigned Extend = 0;
10430 switch (Op->getOpcode()) {
10431 default:
10432 llvm_unreachable("Expected VECREDUCE opcode");
10435 Extend = ISD::ZERO_EXTEND;
10436 break;
10439 Extend = ISD::SIGN_EXTEND;
10440 break;
10441 }
10442 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10443 }
10444 return Res;
10445}
10446
10448 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10449 // Acquire/Release load/store is not legal for targets without a dmb or
10450 // equivalent available.
10451 return SDValue();
10452
10453 // Monotonic load/store is legal for all targets.
10454 return Op;
10455}
10456
10459 SelectionDAG &DAG,
10460 const ARMSubtarget *Subtarget) {
10461 SDLoc DL(N);
10462 // Under Power Management extensions, the cycle-count is:
10463 // mrc p15, #0, <Rt>, c9, c13, #0
10464 SDValue Ops[] = { N->getOperand(0), // Chain
10465 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10466 DAG.getTargetConstant(15, DL, MVT::i32),
10467 DAG.getTargetConstant(0, DL, MVT::i32),
10468 DAG.getTargetConstant(9, DL, MVT::i32),
10469 DAG.getTargetConstant(13, DL, MVT::i32),
10470 DAG.getTargetConstant(0, DL, MVT::i32)
10471 };
10472
10473 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10474 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10475 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10476 DAG.getConstant(0, DL, MVT::i32)));
10477 Results.push_back(Cycles32.getValue(1));
10478}
10479
10481 SDLoc dl(V.getNode());
10482 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10483 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10484 if (isBigEndian)
10485 std::swap (VLo, VHi);
10486 SDValue RegClass =
10487 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10488 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10489 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10490 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10491 return SDValue(
10492 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10493}
10494
10497 SelectionDAG &DAG) {
10498 assert(N->getValueType(0) == MVT::i64 &&
10499 "AtomicCmpSwap on types less than 64 should be legal");
10500 SDValue Ops[] = {N->getOperand(1),
10501 createGPRPairNode(DAG, N->getOperand(2)),
10502 createGPRPairNode(DAG, N->getOperand(3)),
10503 N->getOperand(0)};
10504 SDNode *CmpSwap = DAG.getMachineNode(
10505 ARM::CMP_SWAP_64, SDLoc(N),
10506 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10507
10508 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10509 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10510
10511 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10512
10513 SDValue Lo =
10514 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10515 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10516 SDValue Hi =
10517 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10518 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10519 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10520 Results.push_back(SDValue(CmpSwap, 2));
10521}
10522
10523SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10524 SDLoc dl(Op);
10525 EVT VT = Op.getValueType();
10526 SDValue Chain = Op.getOperand(0);
10527 SDValue LHS = Op.getOperand(1);
10528 SDValue RHS = Op.getOperand(2);
10529 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10530 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10531
10532 // If we don't have instructions of this float type then soften to a libcall
10533 // and use SETCC instead.
10534 if (isUnsupportedFloatingType(LHS.getValueType())) {
10536 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10537 if (!RHS.getNode()) {
10538 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10539 CC = ISD::SETNE;
10540 }
10541 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10542 DAG.getCondCode(CC));
10543 return DAG.getMergeValues({Result, Chain}, dl);
10544 }
10545
10546 ARMCC::CondCodes CondCode, CondCode2;
10547 FPCCToARMCC(CC, CondCode, CondCode2);
10548
10549 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10550 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10551 // instructions using a chain instead of glue. This would also fix the problem
10552 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10553 // CondCode2 != AL.
10554 SDValue True = DAG.getConstant(1, dl, VT);
10555 SDValue False = DAG.getConstant(0, dl, VT);
10556 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10557 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10558 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10559 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10560 if (CondCode2 != ARMCC::AL) {
10561 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10562 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10563 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10564 }
10565 return DAG.getMergeValues({Result, Chain}, dl);
10566}
10567
10568SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10570
10571 EVT VT = getPointerTy(DAG.getDataLayout());
10572 SDLoc DL(Op);
10573 int FI = MFI.CreateFixedObject(4, 0, false);
10574 return DAG.getFrameIndex(FI, VT);
10575}
10576
10578 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10579 switch (Op.getOpcode()) {
10580 default: llvm_unreachable("Don't know how to custom lower this!");
10581 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10582 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10583 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10584 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10585 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10586 case ISD::SELECT: return LowerSELECT(Op, DAG);
10587 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10588 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10589 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10590 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10591 case ISD::VASTART: return LowerVASTART(Op, DAG);
10592 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10593 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10594 case ISD::SINT_TO_FP:
10595 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10598 case ISD::FP_TO_SINT:
10599 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10601 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10602 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10603 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10604 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10605 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10606 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10607 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10608 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10609 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10610 Subtarget);
10611 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10612 case ISD::SHL:
10613 case ISD::SRL:
10614 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10615 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10616 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10617 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10618 case ISD::SRL_PARTS:
10619 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10620 case ISD::CTTZ:
10621 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10622 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10623 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10624 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10625 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10626 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10627 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10628 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10629 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10630 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10631 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10632 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10633 case ISD::SIGN_EXTEND:
10634 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10635 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10636 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10637 case ISD::SET_FPMODE:
10638 return LowerSET_FPMODE(Op, DAG);
10639 case ISD::RESET_FPMODE:
10640 return LowerRESET_FPMODE(Op, DAG);
10641 case ISD::MUL: return LowerMUL(Op, DAG);
10642 case ISD::SDIV:
10643 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10644 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10645 return LowerSDIV(Op, DAG, Subtarget);
10646 case ISD::UDIV:
10647 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10648 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10649 return LowerUDIV(Op, DAG, Subtarget);
10650 case ISD::UADDO_CARRY:
10651 case ISD::USUBO_CARRY:
10652 return LowerUADDSUBO_CARRY(Op, DAG);
10653 case ISD::SADDO:
10654 case ISD::SSUBO:
10655 return LowerSignedALUO(Op, DAG);
10656 case ISD::UADDO:
10657 case ISD::USUBO:
10658 return LowerUnsignedALUO(Op, DAG);
10659 case ISD::SADDSAT:
10660 case ISD::SSUBSAT:
10661 case ISD::UADDSAT:
10662 case ISD::USUBSAT:
10663 return LowerADDSUBSAT(Op, DAG, Subtarget);
10664 case ISD::LOAD:
10665 return LowerPredicateLoad(Op, DAG);
10666 case ISD::STORE:
10667 return LowerSTORE(Op, DAG, Subtarget);
10668 case ISD::MLOAD:
10669 return LowerMLOAD(Op, DAG);
10670 case ISD::VECREDUCE_MUL:
10671 case ISD::VECREDUCE_AND:
10672 case ISD::VECREDUCE_OR:
10673 case ISD::VECREDUCE_XOR:
10674 return LowerVecReduce(Op, DAG, Subtarget);
10679 return LowerVecReduceF(Op, DAG, Subtarget);
10684 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10685 case ISD::ATOMIC_LOAD:
10686 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10687 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10688 case ISD::SDIVREM:
10689 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10691 if (Subtarget->isTargetWindows())
10692 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10693 llvm_unreachable("Don't know how to custom lower this!");
10695 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10697 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10698 case ISD::STRICT_FSETCC:
10699 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10700 case ISD::SPONENTRY:
10701 return LowerSPONENTRY(Op, DAG);
10702 case ARMISD::WIN__DBZCHK: return SDValue();
10703 }
10704}
10705
10707 SelectionDAG &DAG) {
10708 unsigned IntNo = N->getConstantOperandVal(0);
10709 unsigned Opc = 0;
10710 if (IntNo == Intrinsic::arm_smlald)
10711 Opc = ARMISD::SMLALD;
10712 else if (IntNo == Intrinsic::arm_smlaldx)
10713 Opc = ARMISD::SMLALDX;
10714 else if (IntNo == Intrinsic::arm_smlsld)
10715 Opc = ARMISD::SMLSLD;
10716 else if (IntNo == Intrinsic::arm_smlsldx)
10717 Opc = ARMISD::SMLSLDX;
10718 else
10719 return;
10720
10721 SDLoc dl(N);
10722 SDValue Lo, Hi;
10723 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10724
10725 SDValue LongMul = DAG.getNode(Opc, dl,
10726 DAG.getVTList(MVT::i32, MVT::i32),
10727 N->getOperand(1), N->getOperand(2),
10728 Lo, Hi);
10729 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10730 LongMul.getValue(0), LongMul.getValue(1)));
10731}
10732
10733/// ReplaceNodeResults - Replace the results of node with an illegal result
10734/// type with new values built out of custom code.
10737 SelectionDAG &DAG) const {
10738 SDValue Res;
10739 switch (N->getOpcode()) {
10740 default:
10741 llvm_unreachable("Don't know how to custom expand this!");
10742 case ISD::READ_REGISTER:
10744 break;
10745 case ISD::BITCAST:
10746 Res = ExpandBITCAST(N, DAG, Subtarget);
10747 break;
10748 case ISD::SRL:
10749 case ISD::SRA:
10750 case ISD::SHL:
10751 Res = Expand64BitShift(N, DAG, Subtarget);
10752 break;
10753 case ISD::SREM:
10754 case ISD::UREM:
10755 Res = LowerREM(N, DAG);
10756 break;
10757 case ISD::SDIVREM:
10758 case ISD::UDIVREM:
10759 Res = LowerDivRem(SDValue(N, 0), DAG);
10760 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10761 Results.push_back(Res.getValue(0));
10762 Results.push_back(Res.getValue(1));
10763 return;
10764 case ISD::SADDSAT:
10765 case ISD::SSUBSAT:
10766 case ISD::UADDSAT:
10767 case ISD::USUBSAT:
10768 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10769 break;
10771 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10772 return;
10773 case ISD::UDIV:
10774 case ISD::SDIV:
10775 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10776 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10777 Results);
10780 return;
10782 return ReplaceLongIntrinsic(N, Results, DAG);
10783 case ISD::LOAD:
10784 LowerLOAD(N, Results, DAG);
10785 break;
10786 case ISD::TRUNCATE:
10787 Res = LowerTruncate(N, DAG, Subtarget);
10788 break;
10789 case ISD::SIGN_EXTEND:
10790 case ISD::ZERO_EXTEND:
10791 Res = LowerVectorExtend(N, DAG, Subtarget);
10792 break;
10795 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10796 break;
10797 }
10798 if (Res.getNode())
10799 Results.push_back(Res);
10800}
10801
10802//===----------------------------------------------------------------------===//
10803// ARM Scheduler Hooks
10804//===----------------------------------------------------------------------===//
10805
10806/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10807/// registers the function context.
10808void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10810 MachineBasicBlock *DispatchBB,
10811 int FI) const {
10812 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10813 "ROPI/RWPI not currently supported with SjLj");
10814 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10815 DebugLoc dl = MI.getDebugLoc();
10816 MachineFunction *MF = MBB->getParent();
10820 const Function &F = MF->getFunction();
10821
10822 bool isThumb = Subtarget->isThumb();
10823 bool isThumb2 = Subtarget->isThumb2();
10824
10825 unsigned PCLabelId = AFI->createPICLabelUId();
10826 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10828 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10829 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10830
10831 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10832 : &ARM::GPRRegClass;
10833
10834 // Grab constant pool and fixed stack memory operands.
10835 MachineMemOperand *CPMMO =
10838
10839 MachineMemOperand *FIMMOSt =
10842
10843 // Load the address of the dispatch MBB into the jump buffer.
10844 if (isThumb2) {
10845 // Incoming value: jbuf
10846 // ldr.n r5, LCPI1_1
10847 // orr r5, r5, #1
10848 // add r5, pc
10849 // str r5, [$jbuf, #+4] ; &jbuf[1]
10850 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10851 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10853 .addMemOperand(CPMMO)
10855 // Set the low bit because of thumb mode.
10856 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10857 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10858 .addReg(NewVReg1, RegState::Kill)
10859 .addImm(0x01)
10861 .add(condCodeOp());
10862 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10863 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10864 .addReg(NewVReg2, RegState::Kill)
10865 .addImm(PCLabelId);
10866 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10867 .addReg(NewVReg3, RegState::Kill)
10868 .addFrameIndex(FI)
10869 .addImm(36) // &jbuf[1] :: pc
10870 .addMemOperand(FIMMOSt)
10872 } else if (isThumb) {
10873 // Incoming value: jbuf
10874 // ldr.n r1, LCPI1_4
10875 // add r1, pc
10876 // mov r2, #1
10877 // orrs r1, r2
10878 // add r2, $jbuf, #+4 ; &jbuf[1]
10879 // str r1, [r2]
10880 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10881 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10883 .addMemOperand(CPMMO)
10885 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10886 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10887 .addReg(NewVReg1, RegState::Kill)
10888 .addImm(PCLabelId);
10889 // Set the low bit because of thumb mode.
10890 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10891 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10892 .addReg(ARM::CPSR, RegState::Define)
10893 .addImm(1)
10895 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10896 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10897 .addReg(ARM::CPSR, RegState::Define)
10898 .addReg(NewVReg2, RegState::Kill)
10899 .addReg(NewVReg3, RegState::Kill)
10901 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10902 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10903 .addFrameIndex(FI)
10904 .addImm(36); // &jbuf[1] :: pc
10905 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10906 .addReg(NewVReg4, RegState::Kill)
10907 .addReg(NewVReg5, RegState::Kill)
10908 .addImm(0)
10909 .addMemOperand(FIMMOSt)
10911 } else {
10912 // Incoming value: jbuf
10913 // ldr r1, LCPI1_1
10914 // add r1, pc, r1
10915 // str r1, [$jbuf, #+4] ; &jbuf[1]
10916 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10917 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10919 .addImm(0)
10920 .addMemOperand(CPMMO)
10922 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10923 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10924 .addReg(NewVReg1, RegState::Kill)
10925 .addImm(PCLabelId)
10927 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10928 .addReg(NewVReg2, RegState::Kill)
10929 .addFrameIndex(FI)
10930 .addImm(36) // &jbuf[1] :: pc
10931 .addMemOperand(FIMMOSt)
10933 }
10934}
10935
10936void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10937 MachineBasicBlock *MBB) const {
10938 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10939 DebugLoc dl = MI.getDebugLoc();
10940 MachineFunction *MF = MBB->getParent();
10942 MachineFrameInfo &MFI = MF->getFrameInfo();
10943 int FI = MFI.getFunctionContextIndex();
10944
10945 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10946 : &ARM::GPRnopcRegClass;
10947
10948 // Get a mapping of the call site numbers to all of the landing pads they're
10949 // associated with.
10951 unsigned MaxCSNum = 0;
10952 for (MachineBasicBlock &BB : *MF) {
10953 if (!BB.isEHPad())
10954 continue;
10955
10956 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10957 // pad.
10958 for (MachineInstr &II : BB) {
10959 if (!II.isEHLabel())
10960 continue;
10961
10962 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10963 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10964
10965 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10966 for (unsigned Idx : CallSiteIdxs) {
10967 CallSiteNumToLPad[Idx].push_back(&BB);
10968 MaxCSNum = std::max(MaxCSNum, Idx);
10969 }
10970 break;
10971 }
10972 }
10973
10974 // Get an ordered list of the machine basic blocks for the jump table.
10975 std::vector<MachineBasicBlock*> LPadList;
10977 LPadList.reserve(CallSiteNumToLPad.size());
10978 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10979 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10980 for (MachineBasicBlock *MBB : MBBList) {
10981 LPadList.push_back(MBB);
10982 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10983 }
10984 }
10985
10986 assert(!LPadList.empty() &&
10987 "No landing pad destinations for the dispatch jump table!");
10988
10989 // Create the jump table and associated information.
10991 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10992 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10993
10994 // Create the MBBs for the dispatch code.
10995
10996 // Shove the dispatch's address into the return slot in the function context.
10997 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10998 DispatchBB->setIsEHPad();
10999
11000 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11001 unsigned trap_opcode;
11002 if (Subtarget->isThumb())
11003 trap_opcode = ARM::tTRAP;
11004 else
11005 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
11006
11007 BuildMI(TrapBB, dl, TII->get(trap_opcode));
11008 DispatchBB->addSuccessor(TrapBB);
11009
11010 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11011 DispatchBB->addSuccessor(DispContBB);
11012
11013 // Insert and MBBs.
11014 MF->insert(MF->end(), DispatchBB);
11015 MF->insert(MF->end(), DispContBB);
11016 MF->insert(MF->end(), TrapBB);
11017
11018 // Insert code into the entry block that creates and registers the function
11019 // context.
11020 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11021
11022 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11025
11027 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11028
11029 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11030 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11031
11032 // Add a register mask with no preserved registers. This results in all
11033 // registers being marked as clobbered. This can't work if the dispatch block
11034 // is in a Thumb1 function and is linked with ARM code which uses the FP
11035 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11037
11038 bool IsPositionIndependent = isPositionIndependent();
11039 unsigned NumLPads = LPadList.size();
11040 if (Subtarget->isThumb2()) {
11041 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11042 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11043 .addFrameIndex(FI)
11044 .addImm(4)
11045 .addMemOperand(FIMMOLd)
11047
11048 if (NumLPads < 256) {
11049 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11050 .addReg(NewVReg1)
11051 .addImm(LPadList.size())
11053 } else {
11054 Register VReg1 = MRI->createVirtualRegister(TRC);
11055 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11056 .addImm(NumLPads & 0xFFFF)
11058
11059 unsigned VReg2 = VReg1;
11060 if ((NumLPads & 0xFFFF0000) != 0) {
11061 VReg2 = MRI->createVirtualRegister(TRC);
11062 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11063 .addReg(VReg1)
11064 .addImm(NumLPads >> 16)
11066 }
11067
11068 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11069 .addReg(NewVReg1)
11070 .addReg(VReg2)
11072 }
11073
11074 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11075 .addMBB(TrapBB)
11077 .addReg(ARM::CPSR);
11078
11079 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11080 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11081 .addJumpTableIndex(MJTI)
11083
11084 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11085 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11086 .addReg(NewVReg3, RegState::Kill)
11087 .addReg(NewVReg1)
11090 .add(condCodeOp());
11091
11092 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11093 .addReg(NewVReg4, RegState::Kill)
11094 .addReg(NewVReg1)
11095 .addJumpTableIndex(MJTI);
11096 } else if (Subtarget->isThumb()) {
11097 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11098 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11099 .addFrameIndex(FI)
11100 .addImm(1)
11101 .addMemOperand(FIMMOLd)
11103
11104 if (NumLPads < 256) {
11105 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11106 .addReg(NewVReg1)
11107 .addImm(NumLPads)
11109 } else {
11110 MachineConstantPool *ConstantPool = MF->getConstantPool();
11111 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11112 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11113
11114 // MachineConstantPool wants an explicit alignment.
11115 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11116 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11117
11118 Register VReg1 = MRI->createVirtualRegister(TRC);
11119 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11120 .addReg(VReg1, RegState::Define)
11123 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11124 .addReg(NewVReg1)
11125 .addReg(VReg1)
11127 }
11128
11129 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11130 .addMBB(TrapBB)
11132 .addReg(ARM::CPSR);
11133
11134 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11135 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11136 .addReg(ARM::CPSR, RegState::Define)
11137 .addReg(NewVReg1)
11138 .addImm(2)
11140
11141 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11142 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11143 .addJumpTableIndex(MJTI)
11145
11146 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11147 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11148 .addReg(ARM::CPSR, RegState::Define)
11149 .addReg(NewVReg2, RegState::Kill)
11150 .addReg(NewVReg3)
11152
11153 MachineMemOperand *JTMMOLd =
11154 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11156
11157 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11158 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11159 .addReg(NewVReg4, RegState::Kill)
11160 .addImm(0)
11161 .addMemOperand(JTMMOLd)
11163
11164 unsigned NewVReg6 = NewVReg5;
11165 if (IsPositionIndependent) {
11166 NewVReg6 = MRI->createVirtualRegister(TRC);
11167 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11168 .addReg(ARM::CPSR, RegState::Define)
11169 .addReg(NewVReg5, RegState::Kill)
11170 .addReg(NewVReg3)
11172 }
11173
11174 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11175 .addReg(NewVReg6, RegState::Kill)
11176 .addJumpTableIndex(MJTI);
11177 } else {
11178 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11179 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11180 .addFrameIndex(FI)
11181 .addImm(4)
11182 .addMemOperand(FIMMOLd)
11184
11185 if (NumLPads < 256) {
11186 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11187 .addReg(NewVReg1)
11188 .addImm(NumLPads)
11190 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11191 Register VReg1 = MRI->createVirtualRegister(TRC);
11192 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11193 .addImm(NumLPads & 0xFFFF)
11195
11196 unsigned VReg2 = VReg1;
11197 if ((NumLPads & 0xFFFF0000) != 0) {
11198 VReg2 = MRI->createVirtualRegister(TRC);
11199 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11200 .addReg(VReg1)
11201 .addImm(NumLPads >> 16)
11203 }
11204
11205 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11206 .addReg(NewVReg1)
11207 .addReg(VReg2)
11209 } else {
11210 MachineConstantPool *ConstantPool = MF->getConstantPool();
11211 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11212 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11213
11214 // MachineConstantPool wants an explicit alignment.
11215 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11216 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11217
11218 Register VReg1 = MRI->createVirtualRegister(TRC);
11219 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11220 .addReg(VReg1, RegState::Define)
11222 .addImm(0)
11224 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11225 .addReg(NewVReg1)
11226 .addReg(VReg1, RegState::Kill)
11228 }
11229
11230 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11231 .addMBB(TrapBB)
11233 .addReg(ARM::CPSR);
11234
11235 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11236 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11237 .addReg(NewVReg1)
11240 .add(condCodeOp());
11241 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11242 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11243 .addJumpTableIndex(MJTI)
11245
11246 MachineMemOperand *JTMMOLd =
11247 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11249 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11250 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11251 .addReg(NewVReg3, RegState::Kill)
11252 .addReg(NewVReg4)
11253 .addImm(0)
11254 .addMemOperand(JTMMOLd)
11256
11257 if (IsPositionIndependent) {
11258 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11259 .addReg(NewVReg5, RegState::Kill)
11260 .addReg(NewVReg4)
11261 .addJumpTableIndex(MJTI);
11262 } else {
11263 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11264 .addReg(NewVReg5, RegState::Kill)
11265 .addJumpTableIndex(MJTI);
11266 }
11267 }
11268
11269 // Add the jump table entries as successors to the MBB.
11271 for (MachineBasicBlock *CurMBB : LPadList) {
11272 if (SeenMBBs.insert(CurMBB).second)
11273 DispContBB->addSuccessor(CurMBB);
11274 }
11275
11276 // N.B. the order the invoke BBs are processed in doesn't matter here.
11277 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11279 for (MachineBasicBlock *BB : InvokeBBs) {
11280
11281 // Remove the landing pad successor from the invoke block and replace it
11282 // with the new dispatch block.
11283 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11284 while (!Successors.empty()) {
11285 MachineBasicBlock *SMBB = Successors.pop_back_val();
11286 if (SMBB->isEHPad()) {
11287 BB->removeSuccessor(SMBB);
11288 MBBLPads.push_back(SMBB);
11289 }
11290 }
11291
11292 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11293 BB->normalizeSuccProbs();
11294
11295 // Find the invoke call and mark all of the callee-saved registers as
11296 // 'implicit defined' so that they're spilled. This prevents code from
11297 // moving instructions to before the EH block, where they will never be
11298 // executed.
11300 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11301 if (!II->isCall()) continue;
11302
11305 OI = II->operands_begin(), OE = II->operands_end();
11306 OI != OE; ++OI) {
11307 if (!OI->isReg()) continue;
11308 DefRegs[OI->getReg()] = true;
11309 }
11310
11311 MachineInstrBuilder MIB(*MF, &*II);
11312
11313 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11314 unsigned Reg = SavedRegs[i];
11315 if (Subtarget->isThumb2() &&
11316 !ARM::tGPRRegClass.contains(Reg) &&
11317 !ARM::hGPRRegClass.contains(Reg))
11318 continue;
11319 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11320 continue;
11321 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11322 continue;
11323 if (!DefRegs[Reg])
11325 }
11326
11327 break;
11328 }
11329 }
11330
11331 // Mark all former landing pads as non-landing pads. The dispatch is the only
11332 // landing pad now.
11333 for (MachineBasicBlock *MBBLPad : MBBLPads)
11334 MBBLPad->setIsEHPad(false);
11335
11336 // The instruction is gone now.
11337 MI.eraseFromParent();
11338}
11339
11340static
11342 for (MachineBasicBlock *S : MBB->successors())
11343 if (S != Succ)
11344 return S;
11345 llvm_unreachable("Expecting a BB with two successors!");
11346}
11347
11348/// Return the load opcode for a given load size. If load size >= 8,
11349/// neon opcode will be returned.
11350static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11351 if (LdSize >= 8)
11352 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11353 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11354 if (IsThumb1)
11355 return LdSize == 4 ? ARM::tLDRi
11356 : LdSize == 2 ? ARM::tLDRHi
11357 : LdSize == 1 ? ARM::tLDRBi : 0;
11358 if (IsThumb2)
11359 return LdSize == 4 ? ARM::t2LDR_POST
11360 : LdSize == 2 ? ARM::t2LDRH_POST
11361 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11362 return LdSize == 4 ? ARM::LDR_POST_IMM
11363 : LdSize == 2 ? ARM::LDRH_POST
11364 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11365}
11366
11367/// Return the store opcode for a given store size. If store size >= 8,
11368/// neon opcode will be returned.
11369static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11370 if (StSize >= 8)
11371 return StSize == 16 ? ARM::VST1q32wb_fixed
11372 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11373 if (IsThumb1)
11374 return StSize == 4 ? ARM::tSTRi
11375 : StSize == 2 ? ARM::tSTRHi
11376 : StSize == 1 ? ARM::tSTRBi : 0;
11377 if (IsThumb2)
11378 return StSize == 4 ? ARM::t2STR_POST
11379 : StSize == 2 ? ARM::t2STRH_POST
11380 : StSize == 1 ? ARM::t2STRB_POST : 0;
11381 return StSize == 4 ? ARM::STR_POST_IMM
11382 : StSize == 2 ? ARM::STRH_POST
11383 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11384}
11385
11386/// Emit a post-increment load operation with given size. The instructions
11387/// will be added to BB at Pos.
11389 const TargetInstrInfo *TII, const DebugLoc &dl,
11390 unsigned LdSize, unsigned Data, unsigned AddrIn,
11391 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11392 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11393 assert(LdOpc != 0 && "Should have a load opcode");
11394 if (LdSize >= 8) {
11395 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11396 .addReg(AddrOut, RegState::Define)
11397 .addReg(AddrIn)
11398 .addImm(0)
11400 } else if (IsThumb1) {
11401 // load + update AddrIn
11402 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11403 .addReg(AddrIn)
11404 .addImm(0)
11406 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11407 .add(t1CondCodeOp())
11408 .addReg(AddrIn)
11409 .addImm(LdSize)
11411 } else if (IsThumb2) {
11412 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11413 .addReg(AddrOut, RegState::Define)
11414 .addReg(AddrIn)
11415 .addImm(LdSize)
11417 } else { // arm
11418 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11419 .addReg(AddrOut, RegState::Define)
11420 .addReg(AddrIn)
11421 .addReg(0)
11422 .addImm(LdSize)
11424 }
11425}
11426
11427/// Emit a post-increment store operation with given size. The instructions
11428/// will be added to BB at Pos.
11430 const TargetInstrInfo *TII, const DebugLoc &dl,
11431 unsigned StSize, unsigned Data, unsigned AddrIn,
11432 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11433 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11434 assert(StOpc != 0 && "Should have a store opcode");
11435 if (StSize >= 8) {
11436 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11437 .addReg(AddrIn)
11438 .addImm(0)
11439 .addReg(Data)
11441 } else if (IsThumb1) {
11442 // store + update AddrIn
11443 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11444 .addReg(Data)
11445 .addReg(AddrIn)
11446 .addImm(0)
11448 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11449 .add(t1CondCodeOp())
11450 .addReg(AddrIn)
11451 .addImm(StSize)
11453 } else if (IsThumb2) {
11454 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11455 .addReg(Data)
11456 .addReg(AddrIn)
11457 .addImm(StSize)
11459 } else { // arm
11460 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11461 .addReg(Data)
11462 .addReg(AddrIn)
11463 .addReg(0)
11464 .addImm(StSize)
11466 }
11467}
11468
11470ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11471 MachineBasicBlock *BB) const {
11472 // This pseudo instruction has 3 operands: dst, src, size
11473 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11474 // Otherwise, we will generate unrolled scalar copies.
11475 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11476 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11478
11479 Register dest = MI.getOperand(0).getReg();
11480 Register src = MI.getOperand(1).getReg();
11481 unsigned SizeVal = MI.getOperand(2).getImm();
11482 unsigned Alignment = MI.getOperand(3).getImm();
11483 DebugLoc dl = MI.getDebugLoc();
11484
11485 MachineFunction *MF = BB->getParent();
11487 unsigned UnitSize = 0;
11488 const TargetRegisterClass *TRC = nullptr;
11489 const TargetRegisterClass *VecTRC = nullptr;
11490
11491 bool IsThumb1 = Subtarget->isThumb1Only();
11492 bool IsThumb2 = Subtarget->isThumb2();
11493 bool IsThumb = Subtarget->isThumb();
11494
11495 if (Alignment & 1) {
11496 UnitSize = 1;
11497 } else if (Alignment & 2) {
11498 UnitSize = 2;
11499 } else {
11500 // Check whether we can use NEON instructions.
11501 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11502 Subtarget->hasNEON()) {
11503 if ((Alignment % 16 == 0) && SizeVal >= 16)
11504 UnitSize = 16;
11505 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11506 UnitSize = 8;
11507 }
11508 // Can't use NEON instructions.
11509 if (UnitSize == 0)
11510 UnitSize = 4;
11511 }
11512
11513 // Select the correct opcode and register class for unit size load/store
11514 bool IsNeon = UnitSize >= 8;
11515 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11516 if (IsNeon)
11517 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11518 : UnitSize == 8 ? &ARM::DPRRegClass
11519 : nullptr;
11520
11521 unsigned BytesLeft = SizeVal % UnitSize;
11522 unsigned LoopSize = SizeVal - BytesLeft;
11523
11524 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11525 // Use LDR and STR to copy.
11526 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11527 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11528 unsigned srcIn = src;
11529 unsigned destIn = dest;
11530 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11531 Register srcOut = MRI.createVirtualRegister(TRC);
11532 Register destOut = MRI.createVirtualRegister(TRC);
11533 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11534 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11535 IsThumb1, IsThumb2);
11536 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11537 IsThumb1, IsThumb2);
11538 srcIn = srcOut;
11539 destIn = destOut;
11540 }
11541
11542 // Handle the leftover bytes with LDRB and STRB.
11543 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11544 // [destOut] = STRB_POST(scratch, destIn, 1)
11545 for (unsigned i = 0; i < BytesLeft; i++) {
11546 Register srcOut = MRI.createVirtualRegister(TRC);
11547 Register destOut = MRI.createVirtualRegister(TRC);
11548 Register scratch = MRI.createVirtualRegister(TRC);
11549 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11550 IsThumb1, IsThumb2);
11551 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11552 IsThumb1, IsThumb2);
11553 srcIn = srcOut;
11554 destIn = destOut;
11555 }
11556 MI.eraseFromParent(); // The instruction is gone now.
11557 return BB;
11558 }
11559
11560 // Expand the pseudo op to a loop.
11561 // thisMBB:
11562 // ...
11563 // movw varEnd, # --> with thumb2
11564 // movt varEnd, #
11565 // ldrcp varEnd, idx --> without thumb2
11566 // fallthrough --> loopMBB
11567 // loopMBB:
11568 // PHI varPhi, varEnd, varLoop
11569 // PHI srcPhi, src, srcLoop
11570 // PHI destPhi, dst, destLoop
11571 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11572 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11573 // subs varLoop, varPhi, #UnitSize
11574 // bne loopMBB
11575 // fallthrough --> exitMBB
11576 // exitMBB:
11577 // epilogue to handle left-over bytes
11578 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11579 // [destOut] = STRB_POST(scratch, destLoop, 1)
11580 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11581 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11582 MF->insert(It, loopMBB);
11583 MF->insert(It, exitMBB);
11584
11585 // Set the call frame size on entry to the new basic blocks.
11586 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11587 loopMBB->setCallFrameSize(CallFrameSize);
11588 exitMBB->setCallFrameSize(CallFrameSize);
11589
11590 // Transfer the remainder of BB and its successor edges to exitMBB.
11591 exitMBB->splice(exitMBB->begin(), BB,
11592 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11594
11595 // Load an immediate to varEnd.
11596 Register varEnd = MRI.createVirtualRegister(TRC);
11597 if (Subtarget->useMovt()) {
11598 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11599 varEnd)
11600 .addImm(LoopSize);
11601 } else if (Subtarget->genExecuteOnly()) {
11602 assert(IsThumb && "Non-thumb expected to have used movt");
11603 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11604 } else {
11606 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11607 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11608
11609 // MachineConstantPool wants an explicit alignment.
11610 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11611 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11612 MachineMemOperand *CPMMO =
11615
11616 if (IsThumb)
11617 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11618 .addReg(varEnd, RegState::Define)
11621 .addMemOperand(CPMMO);
11622 else
11623 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11624 .addReg(varEnd, RegState::Define)
11626 .addImm(0)
11628 .addMemOperand(CPMMO);
11629 }
11630 BB->addSuccessor(loopMBB);
11631
11632 // Generate the loop body:
11633 // varPhi = PHI(varLoop, varEnd)
11634 // srcPhi = PHI(srcLoop, src)
11635 // destPhi = PHI(destLoop, dst)
11636 MachineBasicBlock *entryBB = BB;
11637 BB = loopMBB;
11638 Register varLoop = MRI.createVirtualRegister(TRC);
11639 Register varPhi = MRI.createVirtualRegister(TRC);
11640 Register srcLoop = MRI.createVirtualRegister(TRC);
11641 Register srcPhi = MRI.createVirtualRegister(TRC);
11642 Register destLoop = MRI.createVirtualRegister(TRC);
11643 Register destPhi = MRI.createVirtualRegister(TRC);
11644
11645 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11646 .addReg(varLoop).addMBB(loopMBB)
11647 .addReg(varEnd).addMBB(entryBB);
11648 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11649 .addReg(srcLoop).addMBB(loopMBB)
11650 .addReg(src).addMBB(entryBB);
11651 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11652 .addReg(destLoop).addMBB(loopMBB)
11653 .addReg(dest).addMBB(entryBB);
11654
11655 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11656 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11657 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11658 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11659 IsThumb1, IsThumb2);
11660 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11661 IsThumb1, IsThumb2);
11662
11663 // Decrement loop variable by UnitSize.
11664 if (IsThumb1) {
11665 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11666 .add(t1CondCodeOp())
11667 .addReg(varPhi)
11668 .addImm(UnitSize)
11670 } else {
11672 BuildMI(*BB, BB->end(), dl,
11673 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11674 MIB.addReg(varPhi)
11675 .addImm(UnitSize)
11677 .add(condCodeOp());
11678 MIB->getOperand(5).setReg(ARM::CPSR);
11679 MIB->getOperand(5).setIsDef(true);
11680 }
11681 BuildMI(*BB, BB->end(), dl,
11682 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11683 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11684
11685 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11686 BB->addSuccessor(loopMBB);
11687 BB->addSuccessor(exitMBB);
11688
11689 // Add epilogue to handle BytesLeft.
11690 BB = exitMBB;
11691 auto StartOfExit = exitMBB->begin();
11692
11693 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11694 // [destOut] = STRB_POST(scratch, destLoop, 1)
11695 unsigned srcIn = srcLoop;
11696 unsigned destIn = destLoop;
11697 for (unsigned i = 0; i < BytesLeft; i++) {
11698 Register srcOut = MRI.createVirtualRegister(TRC);
11699 Register destOut = MRI.createVirtualRegister(TRC);
11700 Register scratch = MRI.createVirtualRegister(TRC);
11701 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11702 IsThumb1, IsThumb2);
11703 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11704 IsThumb1, IsThumb2);
11705 srcIn = srcOut;
11706 destIn = destOut;
11707 }
11708
11709 MI.eraseFromParent(); // The instruction is gone now.
11710 return BB;
11711}
11712
11714ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11715 MachineBasicBlock *MBB) const {
11717 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11718 DebugLoc DL = MI.getDebugLoc();
11719
11720 assert(Subtarget->isTargetWindows() &&
11721 "__chkstk is only supported on Windows");
11722 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11723
11724 // __chkstk takes the number of words to allocate on the stack in R4, and
11725 // returns the stack adjustment in number of bytes in R4. This will not
11726 // clober any other registers (other than the obvious lr).
11727 //
11728 // Although, technically, IP should be considered a register which may be
11729 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11730 // thumb-2 environment, so there is no interworking required. As a result, we
11731 // do not expect a veneer to be emitted by the linker, clobbering IP.
11732 //
11733 // Each module receives its own copy of __chkstk, so no import thunk is
11734 // required, again, ensuring that IP is not clobbered.
11735 //
11736 // Finally, although some linkers may theoretically provide a trampoline for
11737 // out of range calls (which is quite common due to a 32M range limitation of
11738 // branches for Thumb), we can generate the long-call version via
11739 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11740 // IP.
11741
11742 switch (TM.getCodeModel()) {
11743 case CodeModel::Tiny:
11744 llvm_unreachable("Tiny code model not available on ARM.");
11745 case CodeModel::Small:
11746 case CodeModel::Medium:
11747 case CodeModel::Kernel:
11748 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11750 .addExternalSymbol("__chkstk")
11753 .addReg(ARM::R12,
11755 .addReg(ARM::CPSR,
11757 break;
11758 case CodeModel::Large: {
11760 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11761
11762 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11763 .addExternalSymbol("__chkstk");
11766 .addReg(Reg, RegState::Kill)
11769 .addReg(ARM::R12,
11771 .addReg(ARM::CPSR,
11773 break;
11774 }
11775 }
11776
11777 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11778 .addReg(ARM::SP, RegState::Kill)
11779 .addReg(ARM::R4, RegState::Kill)
11782 .add(condCodeOp());
11783
11784 MI.eraseFromParent();
11785 return MBB;
11786}
11787
11789ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11790 MachineBasicBlock *MBB) const {
11791 DebugLoc DL = MI.getDebugLoc();
11792 MachineFunction *MF = MBB->getParent();
11793 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11794
11796 MF->insert(++MBB->getIterator(), ContBB);
11797 ContBB->splice(ContBB->begin(), MBB,
11798 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11800 MBB->addSuccessor(ContBB);
11801
11803 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11804 MF->push_back(TrapBB);
11805 MBB->addSuccessor(TrapBB);
11806
11807 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11808 .addReg(MI.getOperand(0).getReg())
11809 .addImm(0)
11811 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11812 .addMBB(TrapBB)
11814 .addReg(ARM::CPSR);
11815
11816 MI.eraseFromParent();
11817 return ContBB;
11818}
11819
11820// The CPSR operand of SelectItr might be missing a kill marker
11821// because there were multiple uses of CPSR, and ISel didn't know
11822// which to mark. Figure out whether SelectItr should have had a
11823// kill marker, and set it if it should. Returns the correct kill
11824// marker value.
11827 const TargetRegisterInfo* TRI) {
11828 // Scan forward through BB for a use/def of CPSR.
11829 MachineBasicBlock::iterator miI(std::next(SelectItr));
11830 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11831 const MachineInstr& mi = *miI;
11832 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11833 return false;
11834 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11835 break; // Should have kill-flag - update below.
11836 }
11837
11838 // If we hit the end of the block, check whether CPSR is live into a
11839 // successor.
11840 if (miI == BB->end()) {
11841 for (MachineBasicBlock *Succ : BB->successors())
11842 if (Succ->isLiveIn(ARM::CPSR))
11843 return false;
11844 }
11845
11846 // We found a def, or hit the end of the basic block and CPSR wasn't live
11847 // out. SelectMI should have a kill flag on CPSR.
11848 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11849 return true;
11850}
11851
11852/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11853/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11855 MachineBasicBlock *TpLoopBody,
11856 MachineBasicBlock *TpExit, Register OpSizeReg,
11857 const TargetInstrInfo *TII, DebugLoc Dl,
11859 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11860 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11861 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11862 .addUse(OpSizeReg)
11863 .addImm(15)
11865 .addReg(0);
11866
11867 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11868 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11869 .addUse(AddDestReg, RegState::Kill)
11870 .addImm(4)
11872 .addReg(0);
11873
11874 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11875 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11876 .addUse(LsrDestReg, RegState::Kill);
11877
11878 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11879 .addUse(TotalIterationsReg)
11880 .addMBB(TpExit);
11881
11882 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11883 .addMBB(TpLoopBody)
11885
11886 return TotalIterationsReg;
11887}
11888
11889/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11890/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11891/// loops.
11892static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11893 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11894 const TargetInstrInfo *TII, DebugLoc Dl,
11895 MachineRegisterInfo &MRI, Register OpSrcReg,
11896 Register OpDestReg, Register ElementCountReg,
11897 Register TotalIterationsReg, bool IsMemcpy) {
11898 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11899 // array, loop iteration counter, predication counter.
11900
11901 Register SrcPhiReg, CurrSrcReg;
11902 if (IsMemcpy) {
11903 // Current position in the src array
11904 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11905 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11906 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11907 .addUse(OpSrcReg)
11908 .addMBB(TpEntry)
11909 .addUse(CurrSrcReg)
11910 .addMBB(TpLoopBody);
11911 }
11912
11913 // Current position in the dest array
11914 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11915 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11916 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11917 .addUse(OpDestReg)
11918 .addMBB(TpEntry)
11919 .addUse(CurrDestReg)
11920 .addMBB(TpLoopBody);
11921
11922 // Current loop counter
11923 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11924 Register RemainingLoopIterationsReg =
11925 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11926 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11927 .addUse(TotalIterationsReg)
11928 .addMBB(TpEntry)
11929 .addUse(RemainingLoopIterationsReg)
11930 .addMBB(TpLoopBody);
11931
11932 // Predication counter
11933 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11934 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11935 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11936 .addUse(ElementCountReg)
11937 .addMBB(TpEntry)
11938 .addUse(RemainingElementsReg)
11939 .addMBB(TpLoopBody);
11940
11941 // Pass predication counter to VCTP
11942 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11943 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11944 .addUse(PredCounterPhiReg)
11946 .addReg(0)
11947 .addReg(0);
11948
11949 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11950 .addUse(PredCounterPhiReg)
11951 .addImm(16)
11953 .addReg(0);
11954
11955 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11956 Register SrcValueReg;
11957 if (IsMemcpy) {
11958 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11959 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11960 .addDef(CurrSrcReg)
11961 .addDef(SrcValueReg)
11962 .addReg(SrcPhiReg)
11963 .addImm(16)
11965 .addUse(VccrReg)
11966 .addReg(0);
11967 } else
11968 SrcValueReg = OpSrcReg;
11969
11970 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11971 .addDef(CurrDestReg)
11972 .addUse(SrcValueReg)
11973 .addReg(DestPhiReg)
11974 .addImm(16)
11976 .addUse(VccrReg)
11977 .addReg(0);
11978
11979 // Add the pseudoInstrs for decrementing the loop counter and marking the
11980 // end:t2DoLoopDec and t2DoLoopEnd
11981 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11982 .addUse(LoopCounterPhiReg)
11983 .addImm(1);
11984
11985 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11986 .addUse(RemainingLoopIterationsReg)
11987 .addMBB(TpLoopBody);
11988
11989 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11990 .addMBB(TpExit)
11992}
11993
11996 MachineBasicBlock *BB) const {
11997 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11998 DebugLoc dl = MI.getDebugLoc();
11999 bool isThumb2 = Subtarget->isThumb2();
12000 switch (MI.getOpcode()) {
12001 default: {
12002 MI.print(errs());
12003 llvm_unreachable("Unexpected instr type to insert");
12004 }
12005
12006 // Thumb1 post-indexed loads are really just single-register LDMs.
12007 case ARM::tLDR_postidx: {
12008 MachineOperand Def(MI.getOperand(1));
12009 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12010 .add(Def) // Rn_wb
12011 .add(MI.getOperand(2)) // Rn
12012 .add(MI.getOperand(3)) // PredImm
12013 .add(MI.getOperand(4)) // PredReg
12014 .add(MI.getOperand(0)) // Rt
12015 .cloneMemRefs(MI);
12016 MI.eraseFromParent();
12017 return BB;
12018 }
12019
12020 case ARM::MVE_MEMCPYLOOPINST:
12021 case ARM::MVE_MEMSETLOOPINST: {
12022
12023 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12024 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12025 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12026 // adds the relevant instructions in the TP loop Body for generation of a
12027 // WLSTP loop.
12028
12029 // Below is relevant portion of the CFG after the transformation.
12030 // The Machine Basic Blocks are shown along with branch conditions (in
12031 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12032 // portion of the CFG and may not necessarily be the entry/exit of the
12033 // function.
12034
12035 // (Relevant) CFG after transformation:
12036 // TP entry MBB
12037 // |
12038 // |-----------------|
12039 // (n <= 0) (n > 0)
12040 // | |
12041 // | TP loop Body MBB<--|
12042 // | | |
12043 // \ |___________|
12044 // \ /
12045 // TP exit MBB
12046
12047 MachineFunction *MF = BB->getParent();
12048 MachineFunctionProperties &Properties = MF->getProperties();
12050
12051 Register OpDestReg = MI.getOperand(0).getReg();
12052 Register OpSrcReg = MI.getOperand(1).getReg();
12053 Register OpSizeReg = MI.getOperand(2).getReg();
12054
12055 // Allocate the required MBBs and add to parent function.
12056 MachineBasicBlock *TpEntry = BB;
12057 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12058 MachineBasicBlock *TpExit;
12059
12060 MF->push_back(TpLoopBody);
12061
12062 // If any instructions are present in the current block after
12063 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12064 // move the instructions into the newly created exit block. If there are no
12065 // instructions add an explicit branch to the FallThrough block and then
12066 // split.
12067 //
12068 // The split is required for two reasons:
12069 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12070 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12071 // need to be updated. splitAt() already handles this.
12072 TpExit = BB->splitAt(MI, false);
12073 if (TpExit == BB) {
12074 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12075 "block containing memcpy/memset Pseudo");
12076 TpExit = BB->getFallThrough();
12077 BuildMI(BB, dl, TII->get(ARM::t2B))
12078 .addMBB(TpExit)
12080 TpExit = BB->splitAt(MI, false);
12081 }
12082
12083 // Add logic for iteration count
12084 Register TotalIterationsReg =
12085 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12086
12087 // Add the vectorized (and predicated) loads/store instructions
12088 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12089 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12090 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12091
12092 // Required to avoid conflict with the MachineVerifier during testing.
12094
12095 // Connect the blocks
12096 TpEntry->addSuccessor(TpLoopBody);
12097 TpLoopBody->addSuccessor(TpLoopBody);
12098 TpLoopBody->addSuccessor(TpExit);
12099
12100 // Reorder for a more natural layout
12101 TpLoopBody->moveAfter(TpEntry);
12102 TpExit->moveAfter(TpLoopBody);
12103
12104 // Finally, remove the memcpy Pseudo Instruction
12105 MI.eraseFromParent();
12106
12107 // Return the exit block as it may contain other instructions requiring a
12108 // custom inserter
12109 return TpExit;
12110 }
12111
12112 // The Thumb2 pre-indexed stores have the same MI operands, they just
12113 // define them differently in the .td files from the isel patterns, so
12114 // they need pseudos.
12115 case ARM::t2STR_preidx:
12116 MI.setDesc(TII->get(ARM::t2STR_PRE));
12117 return BB;
12118 case ARM::t2STRB_preidx:
12119 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12120 return BB;
12121 case ARM::t2STRH_preidx:
12122 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12123 return BB;
12124
12125 case ARM::STRi_preidx:
12126 case ARM::STRBi_preidx: {
12127 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12128 : ARM::STRB_PRE_IMM;
12129 // Decode the offset.
12130 unsigned Offset = MI.getOperand(4).getImm();
12131 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12133 if (isSub)
12134 Offset = -Offset;
12135
12136 MachineMemOperand *MMO = *MI.memoperands_begin();
12137 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12138 .add(MI.getOperand(0)) // Rn_wb
12139 .add(MI.getOperand(1)) // Rt
12140 .add(MI.getOperand(2)) // Rn
12141 .addImm(Offset) // offset (skip GPR==zero_reg)
12142 .add(MI.getOperand(5)) // pred
12143 .add(MI.getOperand(6))
12144 .addMemOperand(MMO);
12145 MI.eraseFromParent();
12146 return BB;
12147 }
12148 case ARM::STRr_preidx:
12149 case ARM::STRBr_preidx:
12150 case ARM::STRH_preidx: {
12151 unsigned NewOpc;
12152 switch (MI.getOpcode()) {
12153 default: llvm_unreachable("unexpected opcode!");
12154 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12155 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12156 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12157 }
12158 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12159 for (const MachineOperand &MO : MI.operands())
12160 MIB.add(MO);
12161 MI.eraseFromParent();
12162 return BB;
12163 }
12164
12165 case ARM::tMOVCCr_pseudo: {
12166 // To "insert" a SELECT_CC instruction, we actually have to insert the
12167 // diamond control-flow pattern. The incoming instruction knows the
12168 // destination vreg to set, the condition code register to branch on, the
12169 // true/false values to select between, and a branch opcode to use.
12170 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12172
12173 // thisMBB:
12174 // ...
12175 // TrueVal = ...
12176 // cmpTY ccX, r1, r2
12177 // bCC copy1MBB
12178 // fallthrough --> copy0MBB
12179 MachineBasicBlock *thisMBB = BB;
12180 MachineFunction *F = BB->getParent();
12181 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12182 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12183 F->insert(It, copy0MBB);
12184 F->insert(It, sinkMBB);
12185
12186 // Set the call frame size on entry to the new basic blocks.
12187 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12188 copy0MBB->setCallFrameSize(CallFrameSize);
12189 sinkMBB->setCallFrameSize(CallFrameSize);
12190
12191 // Check whether CPSR is live past the tMOVCCr_pseudo.
12192 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12193 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12194 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12195 copy0MBB->addLiveIn(ARM::CPSR);
12196 sinkMBB->addLiveIn(ARM::CPSR);
12197 }
12198
12199 // Transfer the remainder of BB and its successor edges to sinkMBB.
12200 sinkMBB->splice(sinkMBB->begin(), BB,
12201 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12203
12204 BB->addSuccessor(copy0MBB);
12205 BB->addSuccessor(sinkMBB);
12206
12207 BuildMI(BB, dl, TII->get(ARM::tBcc))
12208 .addMBB(sinkMBB)
12209 .addImm(MI.getOperand(3).getImm())
12210 .addReg(MI.getOperand(4).getReg());
12211
12212 // copy0MBB:
12213 // %FalseValue = ...
12214 // # fallthrough to sinkMBB
12215 BB = copy0MBB;
12216
12217 // Update machine-CFG edges
12218 BB->addSuccessor(sinkMBB);
12219
12220 // sinkMBB:
12221 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12222 // ...
12223 BB = sinkMBB;
12224 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12225 .addReg(MI.getOperand(1).getReg())
12226 .addMBB(copy0MBB)
12227 .addReg(MI.getOperand(2).getReg())
12228 .addMBB(thisMBB);
12229
12230 MI.eraseFromParent(); // The pseudo instruction is gone now.
12231 return BB;
12232 }
12233
12234 case ARM::BCCi64:
12235 case ARM::BCCZi64: {
12236 // If there is an unconditional branch to the other successor, remove it.
12237 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12238
12239 // Compare both parts that make up the double comparison separately for
12240 // equality.
12241 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12242
12243 Register LHS1 = MI.getOperand(1).getReg();
12244 Register LHS2 = MI.getOperand(2).getReg();
12245 if (RHSisZero) {
12246 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12247 .addReg(LHS1)
12248 .addImm(0)
12250 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12251 .addReg(LHS2).addImm(0)
12252 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12253 } else {
12254 Register RHS1 = MI.getOperand(3).getReg();
12255 Register RHS2 = MI.getOperand(4).getReg();
12256 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12257 .addReg(LHS1)
12258 .addReg(RHS1)
12260 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12261 .addReg(LHS2).addReg(RHS2)
12262 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12263 }
12264
12265 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12266 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12267 if (MI.getOperand(0).getImm() == ARMCC::NE)
12268 std::swap(destMBB, exitMBB);
12269
12270 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12271 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12272 if (isThumb2)
12273 BuildMI(BB, dl, TII->get(ARM::t2B))
12274 .addMBB(exitMBB)
12276 else
12277 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12278
12279 MI.eraseFromParent(); // The pseudo instruction is gone now.
12280 return BB;
12281 }
12282
12283 case ARM::Int_eh_sjlj_setjmp:
12284 case ARM::Int_eh_sjlj_setjmp_nofp:
12285 case ARM::tInt_eh_sjlj_setjmp:
12286 case ARM::t2Int_eh_sjlj_setjmp:
12287 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12288 return BB;
12289
12290 case ARM::Int_eh_sjlj_setup_dispatch:
12291 EmitSjLjDispatchBlock(MI, BB);
12292 return BB;
12293
12294 case ARM::ABS:
12295 case ARM::t2ABS: {
12296 // To insert an ABS instruction, we have to insert the
12297 // diamond control-flow pattern. The incoming instruction knows the
12298 // source vreg to test against 0, the destination vreg to set,
12299 // the condition code register to branch on, the
12300 // true/false values to select between, and a branch opcode to use.
12301 // It transforms
12302 // V1 = ABS V0
12303 // into
12304 // V2 = MOVS V0
12305 // BCC (branch to SinkBB if V0 >= 0)
12306 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12307 // SinkBB: V1 = PHI(V2, V3)
12308 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12310 MachineFunction *Fn = BB->getParent();
12311 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12312 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12313 Fn->insert(BBI, RSBBB);
12314 Fn->insert(BBI, SinkBB);
12315
12316 Register ABSSrcReg = MI.getOperand(1).getReg();
12317 Register ABSDstReg = MI.getOperand(0).getReg();
12318 bool ABSSrcKIll = MI.getOperand(1).isKill();
12319 bool isThumb2 = Subtarget->isThumb2();
12321 // In Thumb mode S must not be specified if source register is the SP or
12322 // PC and if destination register is the SP, so restrict register class
12323 Register NewRsbDstReg = MRI.createVirtualRegister(
12324 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12325
12326 // Transfer the remainder of BB and its successor edges to sinkMBB.
12327 SinkBB->splice(SinkBB->begin(), BB,
12328 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12330
12331 BB->addSuccessor(RSBBB);
12332 BB->addSuccessor(SinkBB);
12333
12334 // fall through to SinkMBB
12335 RSBBB->addSuccessor(SinkBB);
12336
12337 // insert a cmp at the end of BB
12338 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12339 .addReg(ABSSrcReg)
12340 .addImm(0)
12342
12343 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12344 BuildMI(BB, dl,
12345 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12347
12348 // insert rsbri in RSBBB
12349 // Note: BCC and rsbri will be converted into predicated rsbmi
12350 // by if-conversion pass
12351 BuildMI(*RSBBB, RSBBB->begin(), dl,
12352 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12353 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12354 .addImm(0)
12356 .add(condCodeOp());
12357
12358 // insert PHI in SinkBB,
12359 // reuse ABSDstReg to not change uses of ABS instruction
12360 BuildMI(*SinkBB, SinkBB->begin(), dl,
12361 TII->get(ARM::PHI), ABSDstReg)
12362 .addReg(NewRsbDstReg).addMBB(RSBBB)
12363 .addReg(ABSSrcReg).addMBB(BB);
12364
12365 // remove ABS instruction
12366 MI.eraseFromParent();
12367
12368 // return last added BB
12369 return SinkBB;
12370 }
12371 case ARM::COPY_STRUCT_BYVAL_I32:
12372 ++NumLoopByVals;
12373 return EmitStructByval(MI, BB);
12374 case ARM::WIN__CHKSTK:
12375 return EmitLowered__chkstk(MI, BB);
12376 case ARM::WIN__DBZCHK:
12377 return EmitLowered__dbzchk(MI, BB);
12378 }
12379}
12380
12381/// Attaches vregs to MEMCPY that it will use as scratch registers
12382/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12383/// instead of as a custom inserter because we need the use list from the SDNode.
12384static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12385 MachineInstr &MI, const SDNode *Node) {
12386 bool isThumb1 = Subtarget->isThumb1Only();
12387
12388 DebugLoc DL = MI.getDebugLoc();
12389 MachineFunction *MF = MI.getParent()->getParent();
12391 MachineInstrBuilder MIB(*MF, MI);
12392
12393 // If the new dst/src is unused mark it as dead.
12394 if (!Node->hasAnyUseOfValue(0)) {
12395 MI.getOperand(0).setIsDead(true);
12396 }
12397 if (!Node->hasAnyUseOfValue(1)) {
12398 MI.getOperand(1).setIsDead(true);
12399 }
12400
12401 // The MEMCPY both defines and kills the scratch registers.
12402 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12403 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12404 : &ARM::GPRRegClass);
12406 }
12407}
12408
12410 SDNode *Node) const {
12411 if (MI.getOpcode() == ARM::MEMCPY) {
12412 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12413 return;
12414 }
12415
12416 const MCInstrDesc *MCID = &MI.getDesc();
12417 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12418 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12419 // operand is still set to noreg. If needed, set the optional operand's
12420 // register to CPSR, and remove the redundant implicit def.
12421 //
12422 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12423
12424 // Rename pseudo opcodes.
12425 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12426 unsigned ccOutIdx;
12427 if (NewOpc) {
12428 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12429 MCID = &TII->get(NewOpc);
12430
12431 assert(MCID->getNumOperands() ==
12432 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12433 && "converted opcode should be the same except for cc_out"
12434 " (and, on Thumb1, pred)");
12435
12436 MI.setDesc(*MCID);
12437
12438 // Add the optional cc_out operand
12439 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12440
12441 // On Thumb1, move all input operands to the end, then add the predicate
12442 if (Subtarget->isThumb1Only()) {
12443 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12444 MI.addOperand(MI.getOperand(1));
12445 MI.removeOperand(1);
12446 }
12447
12448 // Restore the ties
12449 for (unsigned i = MI.getNumOperands(); i--;) {
12450 const MachineOperand& op = MI.getOperand(i);
12451 if (op.isReg() && op.isUse()) {
12452 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12453 if (DefIdx != -1)
12454 MI.tieOperands(DefIdx, i);
12455 }
12456 }
12457
12459 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12460 ccOutIdx = 1;
12461 } else
12462 ccOutIdx = MCID->getNumOperands() - 1;
12463 } else
12464 ccOutIdx = MCID->getNumOperands() - 1;
12465
12466 // Any ARM instruction that sets the 's' bit should specify an optional
12467 // "cc_out" operand in the last operand position.
12468 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12469 assert(!NewOpc && "Optional cc_out operand required");
12470 return;
12471 }
12472 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12473 // since we already have an optional CPSR def.
12474 bool definesCPSR = false;
12475 bool deadCPSR = false;
12476 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12477 ++i) {
12478 const MachineOperand &MO = MI.getOperand(i);
12479 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12480 definesCPSR = true;
12481 if (MO.isDead())
12482 deadCPSR = true;
12483 MI.removeOperand(i);
12484 break;
12485 }
12486 }
12487 if (!definesCPSR) {
12488 assert(!NewOpc && "Optional cc_out operand required");
12489 return;
12490 }
12491 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12492 if (deadCPSR) {
12493 assert(!MI.getOperand(ccOutIdx).getReg() &&
12494 "expect uninitialized optional cc_out operand");
12495 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12496 if (!Subtarget->isThumb1Only())
12497 return;
12498 }
12499
12500 // If this instruction was defined with an optional CPSR def and its dag node
12501 // had a live implicit CPSR def, then activate the optional CPSR def.
12502 MachineOperand &MO = MI.getOperand(ccOutIdx);
12503 MO.setReg(ARM::CPSR);
12504 MO.setIsDef(true);
12505}
12506
12507//===----------------------------------------------------------------------===//
12508// ARM Optimization Hooks
12509//===----------------------------------------------------------------------===//
12510
12511// Helper function that checks if N is a null or all ones constant.
12512static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12514}
12515
12516// Return true if N is conditionally 0 or all ones.
12517// Detects these expressions where cc is an i1 value:
12518//
12519// (select cc 0, y) [AllOnes=0]
12520// (select cc y, 0) [AllOnes=0]
12521// (zext cc) [AllOnes=0]
12522// (sext cc) [AllOnes=0/1]
12523// (select cc -1, y) [AllOnes=1]
12524// (select cc y, -1) [AllOnes=1]
12525//
12526// Invert is set when N is the null/all ones constant when CC is false.
12527// OtherOp is set to the alternative value of N.
12529 SDValue &CC, bool &Invert,
12530 SDValue &OtherOp,
12531 SelectionDAG &DAG) {
12532 switch (N->getOpcode()) {
12533 default: return false;
12534 case ISD::SELECT: {
12535 CC = N->getOperand(0);
12536 SDValue N1 = N->getOperand(1);
12537 SDValue N2 = N->getOperand(2);
12538 if (isZeroOrAllOnes(N1, AllOnes)) {
12539 Invert = false;
12540 OtherOp = N2;
12541 return true;
12542 }
12543 if (isZeroOrAllOnes(N2, AllOnes)) {
12544 Invert = true;
12545 OtherOp = N1;
12546 return true;
12547 }
12548 return false;
12549 }
12550 case ISD::ZERO_EXTEND:
12551 // (zext cc) can never be the all ones value.
12552 if (AllOnes)
12553 return false;
12554 [[fallthrough]];
12555 case ISD::SIGN_EXTEND: {
12556 SDLoc dl(N);
12557 EVT VT = N->getValueType(0);
12558 CC = N->getOperand(0);
12559 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12560 return false;
12561 Invert = !AllOnes;
12562 if (AllOnes)
12563 // When looking for an AllOnes constant, N is an sext, and the 'other'
12564 // value is 0.
12565 OtherOp = DAG.getConstant(0, dl, VT);
12566 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12567 // When looking for a 0 constant, N can be zext or sext.
12568 OtherOp = DAG.getConstant(1, dl, VT);
12569 else
12570 OtherOp = DAG.getAllOnesConstant(dl, VT);
12571 return true;
12572 }
12573 }
12574}
12575
12576// Combine a constant select operand into its use:
12577//
12578// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12579// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12580// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12581// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12582// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12583//
12584// The transform is rejected if the select doesn't have a constant operand that
12585// is null, or all ones when AllOnes is set.
12586//
12587// Also recognize sext/zext from i1:
12588//
12589// (add (zext cc), x) -> (select cc (add x, 1), x)
12590// (add (sext cc), x) -> (select cc (add x, -1), x)
12591//
12592// These transformations eventually create predicated instructions.
12593//
12594// @param N The node to transform.
12595// @param Slct The N operand that is a select.
12596// @param OtherOp The other N operand (x above).
12597// @param DCI Context.
12598// @param AllOnes Require the select constant to be all ones instead of null.
12599// @returns The new node, or SDValue() on failure.
12600static
12603 bool AllOnes = false) {
12604 SelectionDAG &DAG = DCI.DAG;
12605 EVT VT = N->getValueType(0);
12606 SDValue NonConstantVal;
12607 SDValue CCOp;
12608 bool SwapSelectOps;
12609 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12610 NonConstantVal, DAG))
12611 return SDValue();
12612
12613 // Slct is now know to be the desired identity constant when CC is true.
12614 SDValue TrueVal = OtherOp;
12615 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12616 OtherOp, NonConstantVal);
12617 // Unless SwapSelectOps says CC should be false.
12618 if (SwapSelectOps)
12619 std::swap(TrueVal, FalseVal);
12620
12621 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12622 CCOp, TrueVal, FalseVal);
12623}
12624
12625// Attempt combineSelectAndUse on each operand of a commutative operator N.
12626static
12629 SDValue N0 = N->getOperand(0);
12630 SDValue N1 = N->getOperand(1);
12631 if (N0.getNode()->hasOneUse())
12632 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12633 return Result;
12634 if (N1.getNode()->hasOneUse())
12635 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12636 return Result;
12637 return SDValue();
12638}
12639
12641 // VUZP shuffle node.
12642 if (N->getOpcode() == ARMISD::VUZP)
12643 return true;
12644
12645 // "VUZP" on i32 is an alias for VTRN.
12646 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12647 return true;
12648
12649 return false;
12650}
12651
12654 const ARMSubtarget *Subtarget) {
12655 // Look for ADD(VUZP.0, VUZP.1).
12656 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12657 N0 == N1)
12658 return SDValue();
12659
12660 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12661 if (!N->getValueType(0).is64BitVector())
12662 return SDValue();
12663
12664 // Generate vpadd.
12665 SelectionDAG &DAG = DCI.DAG;
12666 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12667 SDLoc dl(N);
12668 SDNode *Unzip = N0.getNode();
12669 EVT VT = N->getValueType(0);
12670
12672 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12673 TLI.getPointerTy(DAG.getDataLayout())));
12674 Ops.push_back(Unzip->getOperand(0));
12675 Ops.push_back(Unzip->getOperand(1));
12676
12677 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12678}
12679
12682 const ARMSubtarget *Subtarget) {
12683 // Check for two extended operands.
12684 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12685 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12686 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12687 N1.getOpcode() == ISD::ZERO_EXTEND))
12688 return SDValue();
12689
12690 SDValue N00 = N0.getOperand(0);
12691 SDValue N10 = N1.getOperand(0);
12692
12693 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12694 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12695 N00 == N10)
12696 return SDValue();
12697
12698 // We only recognize Q register paddl here; this can't be reached until
12699 // after type legalization.
12700 if (!N00.getValueType().is64BitVector() ||
12702 return SDValue();
12703
12704 // Generate vpaddl.
12705 SelectionDAG &DAG = DCI.DAG;
12706 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12707 SDLoc dl(N);
12708 EVT VT = N->getValueType(0);
12709
12711 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12712 unsigned Opcode;
12713 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12714 Opcode = Intrinsic::arm_neon_vpaddls;
12715 else
12716 Opcode = Intrinsic::arm_neon_vpaddlu;
12717 Ops.push_back(DAG.getConstant(Opcode, dl,
12718 TLI.getPointerTy(DAG.getDataLayout())));
12719 EVT ElemTy = N00.getValueType().getVectorElementType();
12720 unsigned NumElts = VT.getVectorNumElements();
12721 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12722 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12723 N00.getOperand(0), N00.getOperand(1));
12724 Ops.push_back(Concat);
12725
12726 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12727}
12728
12729// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12730// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12731// much easier to match.
12732static SDValue
12735 const ARMSubtarget *Subtarget) {
12736 // Only perform optimization if after legalize, and if NEON is available. We
12737 // also expected both operands to be BUILD_VECTORs.
12738 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12739 || N0.getOpcode() != ISD::BUILD_VECTOR
12740 || N1.getOpcode() != ISD::BUILD_VECTOR)
12741 return SDValue();
12742
12743 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12744 EVT VT = N->getValueType(0);
12745 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12746 return SDValue();
12747
12748 // Check that the vector operands are of the right form.
12749 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12750 // operands, where N is the size of the formed vector.
12751 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12752 // index such that we have a pair wise add pattern.
12753
12754 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12756 return SDValue();
12757 SDValue Vec = N0->getOperand(0)->getOperand(0);
12758 SDNode *V = Vec.getNode();
12759 unsigned nextIndex = 0;
12760
12761 // For each operands to the ADD which are BUILD_VECTORs,
12762 // check to see if each of their operands are an EXTRACT_VECTOR with
12763 // the same vector and appropriate index.
12764 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12767
12768 SDValue ExtVec0 = N0->getOperand(i);
12769 SDValue ExtVec1 = N1->getOperand(i);
12770
12771 // First operand is the vector, verify its the same.
12772 if (V != ExtVec0->getOperand(0).getNode() ||
12773 V != ExtVec1->getOperand(0).getNode())
12774 return SDValue();
12775
12776 // Second is the constant, verify its correct.
12777 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12778 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12779
12780 // For the constant, we want to see all the even or all the odd.
12781 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12782 || C1->getZExtValue() != nextIndex+1)
12783 return SDValue();
12784
12785 // Increment index.
12786 nextIndex+=2;
12787 } else
12788 return SDValue();
12789 }
12790
12791 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12792 // we're using the entire input vector, otherwise there's a size/legality
12793 // mismatch somewhere.
12794 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12796 return SDValue();
12797
12798 // Create VPADDL node.
12799 SelectionDAG &DAG = DCI.DAG;
12800 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12801
12802 SDLoc dl(N);
12803
12804 // Build operand list.
12806 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12807 TLI.getPointerTy(DAG.getDataLayout())));
12808
12809 // Input is the vector.
12810 Ops.push_back(Vec);
12811
12812 // Get widened type and narrowed type.
12813 MVT widenType;
12814 unsigned numElem = VT.getVectorNumElements();
12815
12816 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12817 switch (inputLaneType.getSimpleVT().SimpleTy) {
12818 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12819 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12820 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12821 default:
12822 llvm_unreachable("Invalid vector element type for padd optimization.");
12823 }
12824
12825 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12826 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12827 return DAG.getNode(ExtOp, dl, VT, tmp);
12828}
12829
12831 if (V->getOpcode() == ISD::UMUL_LOHI ||
12832 V->getOpcode() == ISD::SMUL_LOHI)
12833 return V;
12834 return SDValue();
12835}
12836
12837static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12839 const ARMSubtarget *Subtarget) {
12840 if (!Subtarget->hasBaseDSP())
12841 return SDValue();
12842
12843 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12844 // accumulates the product into a 64-bit value. The 16-bit values will
12845 // be sign extended somehow or SRA'd into 32-bit values
12846 // (addc (adde (mul 16bit, 16bit), lo), hi)
12847 SDValue Mul = AddcNode->getOperand(0);
12848 SDValue Lo = AddcNode->getOperand(1);
12849 if (Mul.getOpcode() != ISD::MUL) {
12850 Lo = AddcNode->getOperand(0);
12851 Mul = AddcNode->getOperand(1);
12852 if (Mul.getOpcode() != ISD::MUL)
12853 return SDValue();
12854 }
12855
12856 SDValue SRA = AddeNode->getOperand(0);
12857 SDValue Hi = AddeNode->getOperand(1);
12858 if (SRA.getOpcode() != ISD::SRA) {
12859 SRA = AddeNode->getOperand(1);
12860 Hi = AddeNode->getOperand(0);
12861 if (SRA.getOpcode() != ISD::SRA)
12862 return SDValue();
12863 }
12864 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12865 if (Const->getZExtValue() != 31)
12866 return SDValue();
12867 } else
12868 return SDValue();
12869
12870 if (SRA.getOperand(0) != Mul)
12871 return SDValue();
12872
12873 SelectionDAG &DAG = DCI.DAG;
12874 SDLoc dl(AddcNode);
12875 unsigned Opcode = 0;
12876 SDValue Op0;
12877 SDValue Op1;
12878
12879 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12880 Opcode = ARMISD::SMLALBB;
12881 Op0 = Mul.getOperand(0);
12882 Op1 = Mul.getOperand(1);
12883 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12884 Opcode = ARMISD::SMLALBT;
12885 Op0 = Mul.getOperand(0);
12886 Op1 = Mul.getOperand(1).getOperand(0);
12887 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12888 Opcode = ARMISD::SMLALTB;
12889 Op0 = Mul.getOperand(0).getOperand(0);
12890 Op1 = Mul.getOperand(1);
12891 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12892 Opcode = ARMISD::SMLALTT;
12893 Op0 = Mul->getOperand(0).getOperand(0);
12894 Op1 = Mul->getOperand(1).getOperand(0);
12895 }
12896
12897 if (!Op0 || !Op1)
12898 return SDValue();
12899
12900 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12901 Op0, Op1, Lo, Hi);
12902 // Replace the ADDs' nodes uses by the MLA node's values.
12903 SDValue HiMLALResult(SMLAL.getNode(), 1);
12904 SDValue LoMLALResult(SMLAL.getNode(), 0);
12905
12906 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12907 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12908
12909 // Return original node to notify the driver to stop replacing.
12910 SDValue resNode(AddcNode, 0);
12911 return resNode;
12912}
12913
12916 const ARMSubtarget *Subtarget) {
12917 // Look for multiply add opportunities.
12918 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12919 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12920 // a glue link from the first add to the second add.
12921 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12922 // a S/UMLAL instruction.
12923 // UMUL_LOHI
12924 // / :lo \ :hi
12925 // V \ [no multiline comment]
12926 // loAdd -> ADDC |
12927 // \ :carry /
12928 // V V
12929 // ADDE <- hiAdd
12930 //
12931 // In the special case where only the higher part of a signed result is used
12932 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12933 // a constant with the exact value of 0x80000000, we recognize we are dealing
12934 // with a "rounded multiply and add" (or subtract) and transform it into
12935 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12936
12937 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12938 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12939 "Expect an ADDE or SUBE");
12940
12941 assert(AddeSubeNode->getNumOperands() == 3 &&
12942 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12943 "ADDE node has the wrong inputs");
12944
12945 // Check that we are chained to the right ADDC or SUBC node.
12946 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12947 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12948 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12949 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12950 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12951 return SDValue();
12952
12953 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12954 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12955
12956 // Check if the two operands are from the same mul_lohi node.
12957 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12958 return SDValue();
12959
12960 assert(AddcSubcNode->getNumValues() == 2 &&
12961 AddcSubcNode->getValueType(0) == MVT::i32 &&
12962 "Expect ADDC with two result values. First: i32");
12963
12964 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12965 // maybe a SMLAL which multiplies two 16-bit values.
12966 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12967 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12968 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12969 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12970 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12971 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12972
12973 // Check for the triangle shape.
12974 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12975 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12976
12977 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12978 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12979 return SDValue();
12980
12981 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12982 bool IsLeftOperandMUL = false;
12983 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12984 if (MULOp == SDValue())
12985 MULOp = findMUL_LOHI(AddeSubeOp1);
12986 else
12987 IsLeftOperandMUL = true;
12988 if (MULOp == SDValue())
12989 return SDValue();
12990
12991 // Figure out the right opcode.
12992 unsigned Opc = MULOp->getOpcode();
12993 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12994
12995 // Figure out the high and low input values to the MLAL node.
12996 SDValue *HiAddSub = nullptr;
12997 SDValue *LoMul = nullptr;
12998 SDValue *LowAddSub = nullptr;
12999
13000 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
13001 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
13002 return SDValue();
13003
13004 if (IsLeftOperandMUL)
13005 HiAddSub = &AddeSubeOp1;
13006 else
13007 HiAddSub = &AddeSubeOp0;
13008
13009 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13010 // whose low result is fed to the ADDC/SUBC we are checking.
13011
13012 if (AddcSubcOp0 == MULOp.getValue(0)) {
13013 LoMul = &AddcSubcOp0;
13014 LowAddSub = &AddcSubcOp1;
13015 }
13016 if (AddcSubcOp1 == MULOp.getValue(0)) {
13017 LoMul = &AddcSubcOp1;
13018 LowAddSub = &AddcSubcOp0;
13019 }
13020
13021 if (!LoMul)
13022 return SDValue();
13023
13024 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13025 // the replacement below will create a cycle.
13026 if (AddcSubcNode == HiAddSub->getNode() ||
13027 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13028 return SDValue();
13029
13030 // Create the merged node.
13031 SelectionDAG &DAG = DCI.DAG;
13032
13033 // Start building operand list.
13035 Ops.push_back(LoMul->getOperand(0));
13036 Ops.push_back(LoMul->getOperand(1));
13037
13038 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13039 // the case, we must be doing signed multiplication and only use the higher
13040 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13041 // addition or subtraction with the value of 0x800000.
13042 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13043 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13044 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13045 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13046 0x80000000) {
13047 Ops.push_back(*HiAddSub);
13048 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13049 FinalOpc = ARMISD::SMMLSR;
13050 } else {
13051 FinalOpc = ARMISD::SMMLAR;
13052 }
13053 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13054 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13055
13056 return SDValue(AddeSubeNode, 0);
13057 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13058 // SMMLS is generated during instruction selection and the rest of this
13059 // function can not handle the case where AddcSubcNode is a SUBC.
13060 return SDValue();
13061
13062 // Finish building the operand list for {U/S}MLAL
13063 Ops.push_back(*LowAddSub);
13064 Ops.push_back(*HiAddSub);
13065
13066 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13067 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13068
13069 // Replace the ADDs' nodes uses by the MLA node's values.
13070 SDValue HiMLALResult(MLALNode.getNode(), 1);
13071 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13072
13073 SDValue LoMLALResult(MLALNode.getNode(), 0);
13074 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13075
13076 // Return original node to notify the driver to stop replacing.
13077 return SDValue(AddeSubeNode, 0);
13078}
13079
13082 const ARMSubtarget *Subtarget) {
13083 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13084 // While trying to combine for the other MLAL nodes, first search for the
13085 // chance to use UMAAL. Check if Addc uses a node which has already
13086 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13087 // as the addend, and it's handled in PerformUMLALCombine.
13088
13089 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13090 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13091
13092 // Check that we have a glued ADDC node.
13093 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13094 if (AddcNode->getOpcode() != ARMISD::ADDC)
13095 return SDValue();
13096
13097 // Find the converted UMAAL or quit if it doesn't exist.
13098 SDNode *UmlalNode = nullptr;
13099 SDValue AddHi;
13100 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13101 UmlalNode = AddcNode->getOperand(0).getNode();
13102 AddHi = AddcNode->getOperand(1);
13103 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13104 UmlalNode = AddcNode->getOperand(1).getNode();
13105 AddHi = AddcNode->getOperand(0);
13106 } else {
13107 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13108 }
13109
13110 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13111 // the ADDC as well as Zero.
13112 if (!isNullConstant(UmlalNode->getOperand(3)))
13113 return SDValue();
13114
13115 if ((isNullConstant(AddeNode->getOperand(0)) &&
13116 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13117 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13118 isNullConstant(AddeNode->getOperand(1)))) {
13119 SelectionDAG &DAG = DCI.DAG;
13120 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13121 UmlalNode->getOperand(2), AddHi };
13122 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13123 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13124
13125 // Replace the ADDs' nodes uses by the UMAAL node's values.
13126 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13127 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13128
13129 // Return original node to notify the driver to stop replacing.
13130 return SDValue(AddeNode, 0);
13131 }
13132 return SDValue();
13133}
13134
13136 const ARMSubtarget *Subtarget) {
13137 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13138 return SDValue();
13139
13140 // Check that we have a pair of ADDC and ADDE as operands.
13141 // Both addends of the ADDE must be zero.
13142 SDNode* AddcNode = N->getOperand(2).getNode();
13143 SDNode* AddeNode = N->getOperand(3).getNode();
13144 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13145 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13146 isNullConstant(AddeNode->getOperand(0)) &&
13147 isNullConstant(AddeNode->getOperand(1)) &&
13148 (AddeNode->getOperand(2).getNode() == AddcNode))
13149 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13150 DAG.getVTList(MVT::i32, MVT::i32),
13151 {N->getOperand(0), N->getOperand(1),
13152 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13153 else
13154 return SDValue();
13155}
13156
13159 const ARMSubtarget *Subtarget) {
13160 SelectionDAG &DAG(DCI.DAG);
13161
13162 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13163 // (SUBC (ADDE 0, 0, C), 1) -> C
13164 SDValue LHS = N->getOperand(0);
13165 SDValue RHS = N->getOperand(1);
13166 if (LHS->getOpcode() == ARMISD::ADDE &&
13167 isNullConstant(LHS->getOperand(0)) &&
13168 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13169 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13170 }
13171 }
13172
13173 if (Subtarget->isThumb1Only()) {
13174 SDValue RHS = N->getOperand(1);
13175 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13176 int32_t imm = C->getSExtValue();
13177 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13178 SDLoc DL(N);
13179 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13180 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13181 : ARMISD::ADDC;
13182 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13183 }
13184 }
13185 }
13186
13187 return SDValue();
13188}
13189
13192 const ARMSubtarget *Subtarget) {
13193 if (Subtarget->isThumb1Only()) {
13194 SelectionDAG &DAG = DCI.DAG;
13195 SDValue RHS = N->getOperand(1);
13196 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13197 int64_t imm = C->getSExtValue();
13198 if (imm < 0) {
13199 SDLoc DL(N);
13200
13201 // The with-carry-in form matches bitwise not instead of the negation.
13202 // Effectively, the inverse interpretation of the carry flag already
13203 // accounts for part of the negation.
13204 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13205
13206 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13207 : ARMISD::ADDE;
13208 return DAG.getNode(Opcode, DL, N->getVTList(),
13209 N->getOperand(0), RHS, N->getOperand(2));
13210 }
13211 }
13212 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13213 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13214 }
13215 return SDValue();
13216}
13217
13220 const ARMSubtarget *Subtarget) {
13221 if (!Subtarget->hasMVEIntegerOps())
13222 return SDValue();
13223
13224 SDLoc dl(N);
13225 SDValue SetCC;
13226 SDValue LHS;
13227 SDValue RHS;
13229 SDValue TrueVal;
13230 SDValue FalseVal;
13231
13232 if (N->getOpcode() == ISD::SELECT &&
13233 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13234 SetCC = N->getOperand(0);
13235 LHS = SetCC->getOperand(0);
13236 RHS = SetCC->getOperand(1);
13237 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13238 TrueVal = N->getOperand(1);
13239 FalseVal = N->getOperand(2);
13240 } else if (N->getOpcode() == ISD::SELECT_CC) {
13241 LHS = N->getOperand(0);
13242 RHS = N->getOperand(1);
13243 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13244 TrueVal = N->getOperand(2);
13245 FalseVal = N->getOperand(3);
13246 } else {
13247 return SDValue();
13248 }
13249
13250 unsigned int Opcode = 0;
13251 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13252 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13253 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13254 Opcode = ARMISD::VMINVu;
13255 if (CC == ISD::SETUGT)
13256 std::swap(TrueVal, FalseVal);
13257 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13258 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13259 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13260 Opcode = ARMISD::VMINVs;
13261 if (CC == ISD::SETGT)
13262 std::swap(TrueVal, FalseVal);
13263 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13264 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13265 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13266 Opcode = ARMISD::VMAXVu;
13267 if (CC == ISD::SETULT)
13268 std::swap(TrueVal, FalseVal);
13269 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13270 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13271 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13272 Opcode = ARMISD::VMAXVs;
13273 if (CC == ISD::SETLT)
13274 std::swap(TrueVal, FalseVal);
13275 } else
13276 return SDValue();
13277
13278 // Normalise to the right hand side being the vector reduction
13279 switch (TrueVal->getOpcode()) {
13284 std::swap(LHS, RHS);
13285 std::swap(TrueVal, FalseVal);
13286 break;
13287 }
13288
13289 EVT VectorType = FalseVal->getOperand(0).getValueType();
13290
13291 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13292 VectorType != MVT::v4i32)
13293 return SDValue();
13294
13295 EVT VectorScalarType = VectorType.getVectorElementType();
13296
13297 // The values being selected must also be the ones being compared
13298 if (TrueVal != LHS || FalseVal != RHS)
13299 return SDValue();
13300
13301 EVT LeftType = LHS->getValueType(0);
13302 EVT RightType = RHS->getValueType(0);
13303
13304 // The types must match the reduced type too
13305 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13306 return SDValue();
13307
13308 // Legalise the scalar to an i32
13309 if (VectorScalarType != MVT::i32)
13310 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13311
13312 // Generate the reduction as an i32 for legalisation purposes
13313 auto Reduction =
13314 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13315
13316 // The result isn't actually an i32 so truncate it back to its original type
13317 if (VectorScalarType != MVT::i32)
13318 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13319
13320 return Reduction;
13321}
13322
13323// A special combine for the vqdmulh family of instructions. This is one of the
13324// potential set of patterns that could patch this instruction. The base pattern
13325// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13326// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13327// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13328// the max is unnecessary.
13330 EVT VT = N->getValueType(0);
13331 SDValue Shft;
13332 ConstantSDNode *Clamp;
13333
13334 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13335 return SDValue();
13336
13337 if (N->getOpcode() == ISD::SMIN) {
13338 Shft = N->getOperand(0);
13339 Clamp = isConstOrConstSplat(N->getOperand(1));
13340 } else if (N->getOpcode() == ISD::VSELECT) {
13341 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13342 SDValue Cmp = N->getOperand(0);
13343 if (Cmp.getOpcode() != ISD::SETCC ||
13344 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13345 Cmp.getOperand(0) != N->getOperand(1) ||
13346 Cmp.getOperand(1) != N->getOperand(2))
13347 return SDValue();
13348 Shft = N->getOperand(1);
13349 Clamp = isConstOrConstSplat(N->getOperand(2));
13350 } else
13351 return SDValue();
13352
13353 if (!Clamp)
13354 return SDValue();
13355
13356 MVT ScalarType;
13357 int ShftAmt = 0;
13358 switch (Clamp->getSExtValue()) {
13359 case (1 << 7) - 1:
13360 ScalarType = MVT::i8;
13361 ShftAmt = 7;
13362 break;
13363 case (1 << 15) - 1:
13364 ScalarType = MVT::i16;
13365 ShftAmt = 15;
13366 break;
13367 case (1ULL << 31) - 1:
13368 ScalarType = MVT::i32;
13369 ShftAmt = 31;
13370 break;
13371 default:
13372 return SDValue();
13373 }
13374
13375 if (Shft.getOpcode() != ISD::SRA)
13376 return SDValue();
13378 if (!N1 || N1->getSExtValue() != ShftAmt)
13379 return SDValue();
13380
13381 SDValue Mul = Shft.getOperand(0);
13382 if (Mul.getOpcode() != ISD::MUL)
13383 return SDValue();
13384
13385 SDValue Ext0 = Mul.getOperand(0);
13386 SDValue Ext1 = Mul.getOperand(1);
13387 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13388 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13389 return SDValue();
13390 EVT VecVT = Ext0.getOperand(0).getValueType();
13391 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13392 return SDValue();
13393 if (Ext1.getOperand(0).getValueType() != VecVT ||
13394 VecVT.getScalarType() != ScalarType ||
13395 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13396 return SDValue();
13397
13398 SDLoc DL(Mul);
13399 unsigned LegalLanes = 128 / (ShftAmt + 1);
13400 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13401 // For types smaller than legal vectors extend to be legal and only use needed
13402 // lanes.
13403 if (VecVT.getSizeInBits() < 128) {
13404 EVT ExtVecVT =
13406 VecVT.getVectorNumElements());
13407 SDValue Inp0 =
13408 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13409 SDValue Inp1 =
13410 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13411 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13412 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13413 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13414 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13415 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13416 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13417 }
13418
13419 // For larger types, split into legal sized chunks.
13420 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13421 unsigned NumParts = VecVT.getSizeInBits() / 128;
13423 for (unsigned I = 0; I < NumParts; ++I) {
13424 SDValue Inp0 =
13425 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13426 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13427 SDValue Inp1 =
13428 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13429 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13430 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13431 Parts.push_back(VQDMULH);
13432 }
13433 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13434 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13435}
13436
13439 const ARMSubtarget *Subtarget) {
13440 if (!Subtarget->hasMVEIntegerOps())
13441 return SDValue();
13442
13443 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13444 return V;
13445
13446 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13447 //
13448 // We need to re-implement this optimization here as the implementation in the
13449 // Target-Independent DAGCombiner does not handle the kind of constant we make
13450 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13451 // good reason, allowing truncation there would break other targets).
13452 //
13453 // Currently, this is only done for MVE, as it's the only target that benefits
13454 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13455 if (N->getOperand(0).getOpcode() != ISD::XOR)
13456 return SDValue();
13457 SDValue XOR = N->getOperand(0);
13458
13459 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13460 // It is important to check with truncation allowed as the BUILD_VECTORs we
13461 // generate in those situations will truncate their operands.
13462 ConstantSDNode *Const =
13463 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13464 /*AllowTruncation*/ true);
13465 if (!Const || !Const->isOne())
13466 return SDValue();
13467
13468 // Rewrite into vselect(cond, rhs, lhs).
13469 SDValue Cond = XOR->getOperand(0);
13470 SDValue LHS = N->getOperand(1);
13471 SDValue RHS = N->getOperand(2);
13472 EVT Type = N->getValueType(0);
13473 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13474}
13475
13476// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13479 const ARMSubtarget *Subtarget) {
13480 SDValue Op0 = N->getOperand(0);
13481 SDValue Op1 = N->getOperand(1);
13482 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13483 EVT VT = N->getValueType(0);
13484
13485 if (!Subtarget->hasMVEIntegerOps() ||
13487 return SDValue();
13488
13489 if (CC == ISD::SETUGE) {
13490 std::swap(Op0, Op1);
13491 CC = ISD::SETULT;
13492 }
13493
13494 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13496 return SDValue();
13497
13498 // Check first operand is BuildVector of 0,1,2,...
13499 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13500 if (!Op0.getOperand(I).isUndef() &&
13501 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13502 Op0.getConstantOperandVal(I) == I))
13503 return SDValue();
13504 }
13505
13506 // The second is a Splat of Op1S
13507 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13508 if (!Op1S)
13509 return SDValue();
13510
13511 unsigned Opc;
13512 switch (VT.getVectorNumElements()) {
13513 case 2:
13514 Opc = Intrinsic::arm_mve_vctp64;
13515 break;
13516 case 4:
13517 Opc = Intrinsic::arm_mve_vctp32;
13518 break;
13519 case 8:
13520 Opc = Intrinsic::arm_mve_vctp16;
13521 break;
13522 case 16:
13523 Opc = Intrinsic::arm_mve_vctp8;
13524 break;
13525 default:
13526 return SDValue();
13527 }
13528
13529 SDLoc DL(N);
13530 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13531 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13532 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13533}
13534
13535/// PerformADDECombine - Target-specific dag combine transform from
13536/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13537/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13540 const ARMSubtarget *Subtarget) {
13541 // Only ARM and Thumb2 support UMLAL/SMLAL.
13542 if (Subtarget->isThumb1Only())
13543 return PerformAddeSubeCombine(N, DCI, Subtarget);
13544
13545 // Only perform the checks after legalize when the pattern is available.
13546 if (DCI.isBeforeLegalize()) return SDValue();
13547
13548 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13549}
13550
13551/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13552/// operands N0 and N1. This is a helper for PerformADDCombine that is
13553/// called with the default operands, and if that fails, with commuted
13554/// operands.
13557 const ARMSubtarget *Subtarget){
13558 // Attempt to create vpadd for this add.
13559 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13560 return Result;
13561
13562 // Attempt to create vpaddl for this add.
13563 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13564 return Result;
13565 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13566 Subtarget))
13567 return Result;
13568
13569 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13570 if (N0.getNode()->hasOneUse())
13571 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13572 return Result;
13573 return SDValue();
13574}
13575
13577 EVT VT = N->getValueType(0);
13578 SDValue N0 = N->getOperand(0);
13579 SDValue N1 = N->getOperand(1);
13580 SDLoc dl(N);
13581
13582 auto IsVecReduce = [](SDValue Op) {
13583 switch (Op.getOpcode()) {
13584 case ISD::VECREDUCE_ADD:
13585 case ARMISD::VADDVs:
13586 case ARMISD::VADDVu:
13587 case ARMISD::VMLAVs:
13588 case ARMISD::VMLAVu:
13589 return true;
13590 }
13591 return false;
13592 };
13593
13594 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13595 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13596 // add(add(X, vecreduce(Y)), vecreduce(Z))
13597 // to make better use of vaddva style instructions.
13598 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13599 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13600 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13601 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13602 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13603 }
13604 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13605 // add(add(add(A, C), reduce(B)), reduce(D))
13606 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13607 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13608 unsigned N0RedOp = 0;
13609 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13610 N0RedOp = 1;
13611 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13612 return SDValue();
13613 }
13614
13615 unsigned N1RedOp = 0;
13616 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13617 N1RedOp = 1;
13618 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13619 return SDValue();
13620
13621 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13622 N1.getOperand(1 - N1RedOp));
13623 SDValue Add1 =
13624 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13625 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13626 }
13627 return SDValue();
13628 };
13629 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13630 return R;
13631 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13632 return R;
13633
13634 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13635 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13636 // by ascending load offsets. This can help cores prefetch if the order of
13637 // loads is more predictable.
13638 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13639 // Check if two reductions are known to load data where one is before/after
13640 // another. Return negative if N0 loads data before N1, positive if N1 is
13641 // before N0 and 0 otherwise if nothing is known.
13642 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13643 // Look through to the first operand of a MUL, for the VMLA case.
13644 // Currently only looks at the first operand, in the hope they are equal.
13645 if (N0.getOpcode() == ISD::MUL)
13646 N0 = N0.getOperand(0);
13647 if (N1.getOpcode() == ISD::MUL)
13648 N1 = N1.getOperand(0);
13649
13650 // Return true if the two operands are loads to the same object and the
13651 // offset of the first is known to be less than the offset of the second.
13652 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13653 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13654 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13655 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13656 Load1->isIndexed())
13657 return 0;
13658
13659 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13660 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13661
13662 if (!BaseLocDecomp0.getBase() ||
13663 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13664 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13665 return 0;
13666 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13667 return -1;
13668 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13669 return 1;
13670 return 0;
13671 };
13672
13673 SDValue X;
13674 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13675 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13676 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13677 N0.getOperand(1).getOperand(0));
13678 if (IsBefore < 0) {
13679 X = N0.getOperand(0);
13680 N0 = N0.getOperand(1);
13681 } else if (IsBefore > 0) {
13682 X = N0.getOperand(1);
13683 N0 = N0.getOperand(0);
13684 } else
13685 return SDValue();
13686 } else if (IsVecReduce(N0.getOperand(0))) {
13687 X = N0.getOperand(1);
13688 N0 = N0.getOperand(0);
13689 } else if (IsVecReduce(N0.getOperand(1))) {
13690 X = N0.getOperand(0);
13691 N0 = N0.getOperand(1);
13692 } else
13693 return SDValue();
13694 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13695 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13696 // Note this is backward to how you would expect. We create
13697 // add(reduce(load + 16), reduce(load + 0)) so that the
13698 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13699 // the X as VADDV(load + 0)
13700 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13701 } else
13702 return SDValue();
13703
13704 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13705 return SDValue();
13706
13707 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13708 return SDValue();
13709
13710 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13711 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13712 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13713 };
13714 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13715 return R;
13716 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13717 return R;
13718 return SDValue();
13719}
13720
13722 const ARMSubtarget *Subtarget) {
13723 if (!Subtarget->hasMVEIntegerOps())
13724 return SDValue();
13725
13727 return R;
13728
13729 EVT VT = N->getValueType(0);
13730 SDValue N0 = N->getOperand(0);
13731 SDValue N1 = N->getOperand(1);
13732 SDLoc dl(N);
13733
13734 if (VT != MVT::i64)
13735 return SDValue();
13736
13737 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13738 // will look like:
13739 // t1: i32,i32 = ARMISD::VADDLVs x
13740 // t2: i64 = build_pair t1, t1:1
13741 // t3: i64 = add t2, y
13742 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13743 // the add to be simplified separately.
13744 // We also need to check for sext / zext and commutitive adds.
13745 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13746 SDValue NB) {
13747 if (NB->getOpcode() != ISD::BUILD_PAIR)
13748 return SDValue();
13749 SDValue VecRed = NB->getOperand(0);
13750 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13751 VecRed.getResNo() != 0 ||
13752 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13753 return SDValue();
13754
13755 if (VecRed->getOpcode() == OpcodeA) {
13756 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13757 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13758 VecRed.getOperand(0), VecRed.getOperand(1));
13759 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13760 }
13761
13763 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13764
13765 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13766 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13767 Ops.push_back(VecRed->getOperand(I));
13768 SDValue Red =
13769 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13770 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13771 SDValue(Red.getNode(), 1));
13772 };
13773
13774 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13775 return M;
13776 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13777 return M;
13778 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13779 return M;
13780 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13781 return M;
13782 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13783 return M;
13784 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13785 return M;
13786 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13787 return M;
13788 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13789 return M;
13790 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13791 return M;
13792 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13793 return M;
13794 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13795 return M;
13796 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13797 return M;
13798 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13799 return M;
13800 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13801 return M;
13802 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13803 return M;
13804 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13805 return M;
13806 return SDValue();
13807}
13808
13809bool
13811 CombineLevel Level) const {
13812 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13813 N->getOpcode() == ISD::SRL) &&
13814 "Expected shift op");
13815
13816 if (Level == BeforeLegalizeTypes)
13817 return true;
13818
13819 if (N->getOpcode() != ISD::SHL)
13820 return true;
13821
13822 if (Subtarget->isThumb1Only()) {
13823 // Avoid making expensive immediates by commuting shifts. (This logic
13824 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13825 // for free.)
13826 if (N->getOpcode() != ISD::SHL)
13827 return true;
13828 SDValue N1 = N->getOperand(0);
13829 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13830 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13831 return true;
13832 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13833 if (Const->getAPIntValue().ult(256))
13834 return false;
13835 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13836 Const->getAPIntValue().sgt(-256))
13837 return false;
13838 }
13839 return true;
13840 }
13841
13842 // Turn off commute-with-shift transform after legalization, so it doesn't
13843 // conflict with PerformSHLSimplify. (We could try to detect when
13844 // PerformSHLSimplify would trigger more precisely, but it isn't
13845 // really necessary.)
13846 return false;
13847}
13848
13850 const SDNode *N) const {
13851 assert(N->getOpcode() == ISD::XOR &&
13852 (N->getOperand(0).getOpcode() == ISD::SHL ||
13853 N->getOperand(0).getOpcode() == ISD::SRL) &&
13854 "Expected XOR(SHIFT) pattern");
13855
13856 // Only commute if the entire NOT mask is a hidden shifted mask.
13857 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13858 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13859 if (XorC && ShiftC) {
13860 unsigned MaskIdx, MaskLen;
13861 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13862 unsigned ShiftAmt = ShiftC->getZExtValue();
13863 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13864 if (N->getOperand(0).getOpcode() == ISD::SHL)
13865 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13866 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13867 }
13868 }
13869
13870 return false;
13871}
13872
13874 const SDNode *N, CombineLevel Level) const {
13875 assert(((N->getOpcode() == ISD::SHL &&
13876 N->getOperand(0).getOpcode() == ISD::SRL) ||
13877 (N->getOpcode() == ISD::SRL &&
13878 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13879 "Expected shift-shift mask");
13880
13881 if (!Subtarget->isThumb1Only())
13882 return true;
13883
13884 if (Level == BeforeLegalizeTypes)
13885 return true;
13886
13887 return false;
13888}
13889
13891 EVT VT) const {
13892 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13893}
13894
13896 if (!Subtarget->hasNEON()) {
13897 if (Subtarget->isThumb1Only())
13898 return VT.getScalarSizeInBits() <= 32;
13899 return true;
13900 }
13901 return VT.isScalarInteger();
13902}
13903
13905 EVT VT) const {
13906 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13907 return false;
13908
13909 switch (FPVT.getSimpleVT().SimpleTy) {
13910 case MVT::f16:
13911 return Subtarget->hasVFP2Base();
13912 case MVT::f32:
13913 return Subtarget->hasVFP2Base();
13914 case MVT::f64:
13915 return Subtarget->hasFP64();
13916 case MVT::v4f32:
13917 case MVT::v8f16:
13918 return Subtarget->hasMVEFloatOps();
13919 default:
13920 return false;
13921 }
13922}
13923
13926 const ARMSubtarget *ST) {
13927 // Allow the generic combiner to identify potential bswaps.
13928 if (DCI.isBeforeLegalize())
13929 return SDValue();
13930
13931 // DAG combiner will fold:
13932 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13933 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13934 // Other code patterns that can be also be modified have the following form:
13935 // b + ((a << 1) | 510)
13936 // b + ((a << 1) & 510)
13937 // b + ((a << 1) ^ 510)
13938 // b + ((a << 1) + 510)
13939
13940 // Many instructions can perform the shift for free, but it requires both
13941 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13942 // instruction will needed. So, unfold back to the original pattern if:
13943 // - if c1 and c2 are small enough that they don't require mov imms.
13944 // - the user(s) of the node can perform an shl
13945
13946 // No shifted operands for 16-bit instructions.
13947 if (ST->isThumb() && ST->isThumb1Only())
13948 return SDValue();
13949
13950 // Check that all the users could perform the shl themselves.
13951 for (auto *U : N->uses()) {
13952 switch(U->getOpcode()) {
13953 default:
13954 return SDValue();
13955 case ISD::SUB:
13956 case ISD::ADD:
13957 case ISD::AND:
13958 case ISD::OR:
13959 case ISD::XOR:
13960 case ISD::SETCC:
13961 case ARMISD::CMP:
13962 // Check that the user isn't already using a constant because there
13963 // aren't any instructions that support an immediate operand and a
13964 // shifted operand.
13965 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13966 isa<ConstantSDNode>(U->getOperand(1)))
13967 return SDValue();
13968
13969 // Check that it's not already using a shift.
13970 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13971 U->getOperand(1).getOpcode() == ISD::SHL)
13972 return SDValue();
13973 break;
13974 }
13975 }
13976
13977 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13978 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13979 return SDValue();
13980
13981 if (N->getOperand(0).getOpcode() != ISD::SHL)
13982 return SDValue();
13983
13984 SDValue SHL = N->getOperand(0);
13985
13986 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13987 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13988 if (!C1ShlC2 || !C2)
13989 return SDValue();
13990
13991 APInt C2Int = C2->getAPIntValue();
13992 APInt C1Int = C1ShlC2->getAPIntValue();
13993 unsigned C2Width = C2Int.getBitWidth();
13994 if (C2Int.uge(C2Width))
13995 return SDValue();
13996 uint64_t C2Value = C2Int.getZExtValue();
13997
13998 // Check that performing a lshr will not lose any information.
13999 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
14000 if ((C1Int & Mask) != C1Int)
14001 return SDValue();
14002
14003 // Shift the first constant.
14004 C1Int.lshrInPlace(C2Int);
14005
14006 // The immediates are encoded as an 8-bit value that can be rotated.
14007 auto LargeImm = [](const APInt &Imm) {
14008 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
14009 return Imm.getBitWidth() - Zeros > 8;
14010 };
14011
14012 if (LargeImm(C1Int) || LargeImm(C2Int))
14013 return SDValue();
14014
14015 SelectionDAG &DAG = DCI.DAG;
14016 SDLoc dl(N);
14017 SDValue X = SHL.getOperand(0);
14018 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14019 DAG.getConstant(C1Int, dl, MVT::i32));
14020 // Shift left to compensate for the lshr of C1Int.
14021 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14022
14023 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14024 SHL.dump(); N->dump());
14025 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14026 return Res;
14027}
14028
14029
14030/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14031///
14034 const ARMSubtarget *Subtarget) {
14035 SDValue N0 = N->getOperand(0);
14036 SDValue N1 = N->getOperand(1);
14037
14038 // Only works one way, because it needs an immediate operand.
14039 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14040 return Result;
14041
14042 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14043 return Result;
14044
14045 // First try with the default operand order.
14046 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14047 return Result;
14048
14049 // If that didn't work, try again with the operands commuted.
14050 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14051}
14052
14053// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14054// providing -X is as cheap as X (currently, just a constant).
14056 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14057 return SDValue();
14058 SDValue CSINC = N->getOperand(1);
14059 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14060 return SDValue();
14061
14062 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14063 if (!X)
14064 return SDValue();
14065
14066 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14067 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14068 CSINC.getOperand(0)),
14069 CSINC.getOperand(1), CSINC.getOperand(2),
14070 CSINC.getOperand(3));
14071}
14072
14073/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14074///
14077 const ARMSubtarget *Subtarget) {
14078 SDValue N0 = N->getOperand(0);
14079 SDValue N1 = N->getOperand(1);
14080
14081 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14082 if (N1.getNode()->hasOneUse())
14083 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14084 return Result;
14085
14086 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14087 return R;
14088
14089 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14090 return SDValue();
14091
14092 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14093 // so that we can readily pattern match more mve instructions which can use
14094 // a scalar operand.
14095 SDValue VDup = N->getOperand(1);
14096 if (VDup->getOpcode() != ARMISD::VDUP)
14097 return SDValue();
14098
14099 SDValue VMov = N->getOperand(0);
14100 if (VMov->getOpcode() == ISD::BITCAST)
14101 VMov = VMov->getOperand(0);
14102
14103 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14104 return SDValue();
14105
14106 SDLoc dl(N);
14107 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14108 DCI.DAG.getConstant(0, dl, MVT::i32),
14109 VDup->getOperand(0));
14110 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14111}
14112
14113/// PerformVMULCombine
14114/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14115/// special multiplier accumulator forwarding.
14116/// vmul d3, d0, d2
14117/// vmla d3, d1, d2
14118/// is faster than
14119/// vadd d3, d0, d1
14120/// vmul d3, d3, d2
14121// However, for (A + B) * (A + B),
14122// vadd d2, d0, d1
14123// vmul d3, d0, d2
14124// vmla d3, d1, d2
14125// is slower than
14126// vadd d2, d0, d1
14127// vmul d3, d2, d2
14130 const ARMSubtarget *Subtarget) {
14131 if (!Subtarget->hasVMLxForwarding())
14132 return SDValue();
14133
14134 SelectionDAG &DAG = DCI.DAG;
14135 SDValue N0 = N->getOperand(0);
14136 SDValue N1 = N->getOperand(1);
14137 unsigned Opcode = N0.getOpcode();
14138 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14139 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14140 Opcode = N1.getOpcode();
14141 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14142 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14143 return SDValue();
14144 std::swap(N0, N1);
14145 }
14146
14147 if (N0 == N1)
14148 return SDValue();
14149
14150 EVT VT = N->getValueType(0);
14151 SDLoc DL(N);
14152 SDValue N00 = N0->getOperand(0);
14153 SDValue N01 = N0->getOperand(1);
14154 return DAG.getNode(Opcode, DL, VT,
14155 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14156 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14157}
14158
14160 const ARMSubtarget *Subtarget) {
14161 EVT VT = N->getValueType(0);
14162 if (VT != MVT::v2i64)
14163 return SDValue();
14164
14165 SDValue N0 = N->getOperand(0);
14166 SDValue N1 = N->getOperand(1);
14167
14168 auto IsSignExt = [&](SDValue Op) {
14169 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14170 return SDValue();
14171 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14172 if (VT.getScalarSizeInBits() == 32)
14173 return Op->getOperand(0);
14174 return SDValue();
14175 };
14176 auto IsZeroExt = [&](SDValue Op) {
14177 // Zero extends are a little more awkward. At the point we are matching
14178 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14179 // That might be before of after a bitcast depending on how the and is
14180 // placed. Because this has to look through bitcasts, it is currently only
14181 // supported on LE.
14182 if (!Subtarget->isLittle())
14183 return SDValue();
14184
14185 SDValue And = Op;
14186 if (And->getOpcode() == ISD::BITCAST)
14187 And = And->getOperand(0);
14188 if (And->getOpcode() != ISD::AND)
14189 return SDValue();
14190 SDValue Mask = And->getOperand(1);
14191 if (Mask->getOpcode() == ISD::BITCAST)
14192 Mask = Mask->getOperand(0);
14193
14194 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14195 Mask.getValueType() != MVT::v4i32)
14196 return SDValue();
14197 if (isAllOnesConstant(Mask->getOperand(0)) &&
14198 isNullConstant(Mask->getOperand(1)) &&
14199 isAllOnesConstant(Mask->getOperand(2)) &&
14200 isNullConstant(Mask->getOperand(3)))
14201 return And->getOperand(0);
14202 return SDValue();
14203 };
14204
14205 SDLoc dl(N);
14206 if (SDValue Op0 = IsSignExt(N0)) {
14207 if (SDValue Op1 = IsSignExt(N1)) {
14208 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14209 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14210 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14211 }
14212 }
14213 if (SDValue Op0 = IsZeroExt(N0)) {
14214 if (SDValue Op1 = IsZeroExt(N1)) {
14215 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14216 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14217 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14218 }
14219 }
14220
14221 return SDValue();
14222}
14223
14226 const ARMSubtarget *Subtarget) {
14227 SelectionDAG &DAG = DCI.DAG;
14228
14229 EVT VT = N->getValueType(0);
14230 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14231 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14232
14233 if (Subtarget->isThumb1Only())
14234 return SDValue();
14235
14236 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14237 return SDValue();
14238
14239 if (VT.is64BitVector() || VT.is128BitVector())
14240 return PerformVMULCombine(N, DCI, Subtarget);
14241 if (VT != MVT::i32)
14242 return SDValue();
14243
14244 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14245 if (!C)
14246 return SDValue();
14247
14248 int64_t MulAmt = C->getSExtValue();
14249 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14250
14251 ShiftAmt = ShiftAmt & (32 - 1);
14252 SDValue V = N->getOperand(0);
14253 SDLoc DL(N);
14254
14255 SDValue Res;
14256 MulAmt >>= ShiftAmt;
14257
14258 if (MulAmt >= 0) {
14259 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14260 // (mul x, 2^N + 1) => (add (shl x, N), x)
14261 Res = DAG.getNode(ISD::ADD, DL, VT,
14262 V,
14263 DAG.getNode(ISD::SHL, DL, VT,
14264 V,
14265 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14266 MVT::i32)));
14267 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14268 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14269 Res = DAG.getNode(ISD::SUB, DL, VT,
14270 DAG.getNode(ISD::SHL, DL, VT,
14271 V,
14272 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14273 MVT::i32)),
14274 V);
14275 } else
14276 return SDValue();
14277 } else {
14278 uint64_t MulAmtAbs = -MulAmt;
14279 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14280 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14281 Res = DAG.getNode(ISD::SUB, DL, VT,
14282 V,
14283 DAG.getNode(ISD::SHL, DL, VT,
14284 V,
14285 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14286 MVT::i32)));
14287 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14288 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14289 Res = DAG.getNode(ISD::ADD, DL, VT,
14290 V,
14291 DAG.getNode(ISD::SHL, DL, VT,
14292 V,
14293 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14294 MVT::i32)));
14295 Res = DAG.getNode(ISD::SUB, DL, VT,
14296 DAG.getConstant(0, DL, MVT::i32), Res);
14297 } else
14298 return SDValue();
14299 }
14300
14301 if (ShiftAmt != 0)
14302 Res = DAG.getNode(ISD::SHL, DL, VT,
14303 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14304
14305 // Do not add new nodes to DAG combiner worklist.
14306 DCI.CombineTo(N, Res, false);
14307 return SDValue();
14308}
14309
14312 const ARMSubtarget *Subtarget) {
14313 // Allow DAGCombine to pattern-match before we touch the canonical form.
14314 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14315 return SDValue();
14316
14317 if (N->getValueType(0) != MVT::i32)
14318 return SDValue();
14319
14320 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14321 if (!N1C)
14322 return SDValue();
14323
14324 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14325 // Don't transform uxtb/uxth.
14326 if (C1 == 255 || C1 == 65535)
14327 return SDValue();
14328
14329 SDNode *N0 = N->getOperand(0).getNode();
14330 if (!N0->hasOneUse())
14331 return SDValue();
14332
14333 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14334 return SDValue();
14335
14336 bool LeftShift = N0->getOpcode() == ISD::SHL;
14337
14338 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14339 if (!N01C)
14340 return SDValue();
14341
14342 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14343 if (!C2 || C2 >= 32)
14344 return SDValue();
14345
14346 // Clear irrelevant bits in the mask.
14347 if (LeftShift)
14348 C1 &= (-1U << C2);
14349 else
14350 C1 &= (-1U >> C2);
14351
14352 SelectionDAG &DAG = DCI.DAG;
14353 SDLoc DL(N);
14354
14355 // We have a pattern of the form "(and (shl x, c2) c1)" or
14356 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14357 // transform to a pair of shifts, to save materializing c1.
14358
14359 // First pattern: right shift, then mask off leading bits.
14360 // FIXME: Use demanded bits?
14361 if (!LeftShift && isMask_32(C1)) {
14362 uint32_t C3 = llvm::countl_zero(C1);
14363 if (C2 < C3) {
14364 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14365 DAG.getConstant(C3 - C2, DL, MVT::i32));
14366 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14367 DAG.getConstant(C3, DL, MVT::i32));
14368 }
14369 }
14370
14371 // First pattern, reversed: left shift, then mask off trailing bits.
14372 if (LeftShift && isMask_32(~C1)) {
14373 uint32_t C3 = llvm::countr_zero(C1);
14374 if (C2 < C3) {
14375 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14376 DAG.getConstant(C3 - C2, DL, MVT::i32));
14377 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14378 DAG.getConstant(C3, DL, MVT::i32));
14379 }
14380 }
14381
14382 // Second pattern: left shift, then mask off leading bits.
14383 // FIXME: Use demanded bits?
14384 if (LeftShift && isShiftedMask_32(C1)) {
14385 uint32_t Trailing = llvm::countr_zero(C1);
14386 uint32_t C3 = llvm::countl_zero(C1);
14387 if (Trailing == C2 && C2 + C3 < 32) {
14388 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14389 DAG.getConstant(C2 + C3, DL, MVT::i32));
14390 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14391 DAG.getConstant(C3, DL, MVT::i32));
14392 }
14393 }
14394
14395 // Second pattern, reversed: right shift, then mask off trailing bits.
14396 // FIXME: Handle other patterns of known/demanded bits.
14397 if (!LeftShift && isShiftedMask_32(C1)) {
14398 uint32_t Leading = llvm::countl_zero(C1);
14399 uint32_t C3 = llvm::countr_zero(C1);
14400 if (Leading == C2 && C2 + C3 < 32) {
14401 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14402 DAG.getConstant(C2 + C3, DL, MVT::i32));
14403 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14404 DAG.getConstant(C3, DL, MVT::i32));
14405 }
14406 }
14407
14408 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14409 // if "c1 >> c2" is a cheaper immediate than "c1"
14410 if (LeftShift &&
14411 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14412
14413 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14414 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14415 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14416 DAG.getConstant(C2, DL, MVT::i32));
14417 }
14418
14419 return SDValue();
14420}
14421
14424 const ARMSubtarget *Subtarget) {
14425 // Attempt to use immediate-form VBIC
14426 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14427 SDLoc dl(N);
14428 EVT VT = N->getValueType(0);
14429 SelectionDAG &DAG = DCI.DAG;
14430
14431 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14432 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14433 return SDValue();
14434
14435 APInt SplatBits, SplatUndef;
14436 unsigned SplatBitSize;
14437 bool HasAnyUndefs;
14438 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14439 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14440 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14441 SplatBitSize == 64) {
14442 EVT VbicVT;
14443 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14444 SplatUndef.getZExtValue(), SplatBitSize,
14445 DAG, dl, VbicVT, VT, OtherModImm);
14446 if (Val.getNode()) {
14447 SDValue Input =
14448 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14449 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14450 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14451 }
14452 }
14453 }
14454
14455 if (!Subtarget->isThumb1Only()) {
14456 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14457 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14458 return Result;
14459
14460 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14461 return Result;
14462 }
14463
14464 if (Subtarget->isThumb1Only())
14465 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14466 return Result;
14467
14468 return SDValue();
14469}
14470
14471// Try combining OR nodes to SMULWB, SMULWT.
14474 const ARMSubtarget *Subtarget) {
14475 if (!Subtarget->hasV6Ops() ||
14476 (Subtarget->isThumb() &&
14477 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14478 return SDValue();
14479
14480 SDValue SRL = OR->getOperand(0);
14481 SDValue SHL = OR->getOperand(1);
14482
14483 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14484 SRL = OR->getOperand(1);
14485 SHL = OR->getOperand(0);
14486 }
14487 if (!isSRL16(SRL) || !isSHL16(SHL))
14488 return SDValue();
14489
14490 // The first operands to the shifts need to be the two results from the
14491 // same smul_lohi node.
14492 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14493 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14494 return SDValue();
14495
14496 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14497 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14498 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14499 return SDValue();
14500
14501 // Now we have:
14502 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14503 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14504 // For SMUWB the 16-bit value will signed extended somehow.
14505 // For SMULWT only the SRA is required.
14506 // Check both sides of SMUL_LOHI
14507 SDValue OpS16 = SMULLOHI->getOperand(0);
14508 SDValue OpS32 = SMULLOHI->getOperand(1);
14509
14510 SelectionDAG &DAG = DCI.DAG;
14511 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14512 OpS16 = OpS32;
14513 OpS32 = SMULLOHI->getOperand(0);
14514 }
14515
14516 SDLoc dl(OR);
14517 unsigned Opcode = 0;
14518 if (isS16(OpS16, DAG))
14519 Opcode = ARMISD::SMULWB;
14520 else if (isSRA16(OpS16)) {
14521 Opcode = ARMISD::SMULWT;
14522 OpS16 = OpS16->getOperand(0);
14523 }
14524 else
14525 return SDValue();
14526
14527 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14528 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14529 return SDValue(OR, 0);
14530}
14531
14534 const ARMSubtarget *Subtarget) {
14535 // BFI is only available on V6T2+
14536 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14537 return SDValue();
14538
14539 EVT VT = N->getValueType(0);
14540 SDValue N0 = N->getOperand(0);
14541 SDValue N1 = N->getOperand(1);
14542 SelectionDAG &DAG = DCI.DAG;
14543 SDLoc DL(N);
14544 // 1) or (and A, mask), val => ARMbfi A, val, mask
14545 // iff (val & mask) == val
14546 //
14547 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14548 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14549 // && mask == ~mask2
14550 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14551 // && ~mask == mask2
14552 // (i.e., copy a bitfield value into another bitfield of the same width)
14553
14554 if (VT != MVT::i32)
14555 return SDValue();
14556
14557 SDValue N00 = N0.getOperand(0);
14558
14559 // The value and the mask need to be constants so we can verify this is
14560 // actually a bitfield set. If the mask is 0xffff, we can do better
14561 // via a movt instruction, so don't use BFI in that case.
14562 SDValue MaskOp = N0.getOperand(1);
14563 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14564 if (!MaskC)
14565 return SDValue();
14566 unsigned Mask = MaskC->getZExtValue();
14567 if (Mask == 0xffff)
14568 return SDValue();
14569 SDValue Res;
14570 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14571 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14572 if (N1C) {
14573 unsigned Val = N1C->getZExtValue();
14574 if ((Val & ~Mask) != Val)
14575 return SDValue();
14576
14577 if (ARM::isBitFieldInvertedMask(Mask)) {
14578 Val >>= llvm::countr_zero(~Mask);
14579
14580 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14581 DAG.getConstant(Val, DL, MVT::i32),
14582 DAG.getConstant(Mask, DL, MVT::i32));
14583
14584 DCI.CombineTo(N, Res, false);
14585 // Return value from the original node to inform the combiner than N is
14586 // now dead.
14587 return SDValue(N, 0);
14588 }
14589 } else if (N1.getOpcode() == ISD::AND) {
14590 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14591 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14592 if (!N11C)
14593 return SDValue();
14594 unsigned Mask2 = N11C->getZExtValue();
14595
14596 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14597 // as is to match.
14598 if (ARM::isBitFieldInvertedMask(Mask) &&
14599 (Mask == ~Mask2)) {
14600 // The pack halfword instruction works better for masks that fit it,
14601 // so use that when it's available.
14602 if (Subtarget->hasDSP() &&
14603 (Mask == 0xffff || Mask == 0xffff0000))
14604 return SDValue();
14605 // 2a
14606 unsigned amt = llvm::countr_zero(Mask2);
14607 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14608 DAG.getConstant(amt, DL, MVT::i32));
14609 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14610 DAG.getConstant(Mask, DL, MVT::i32));
14611 DCI.CombineTo(N, Res, false);
14612 // Return value from the original node to inform the combiner than N is
14613 // now dead.
14614 return SDValue(N, 0);
14615 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14616 (~Mask == Mask2)) {
14617 // The pack halfword instruction works better for masks that fit it,
14618 // so use that when it's available.
14619 if (Subtarget->hasDSP() &&
14620 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14621 return SDValue();
14622 // 2b
14623 unsigned lsb = llvm::countr_zero(Mask);
14624 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14625 DAG.getConstant(lsb, DL, MVT::i32));
14626 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14627 DAG.getConstant(Mask2, DL, MVT::i32));
14628 DCI.CombineTo(N, Res, false);
14629 // Return value from the original node to inform the combiner than N is
14630 // now dead.
14631 return SDValue(N, 0);
14632 }
14633 }
14634
14635 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14636 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14638 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14639 // where lsb(mask) == #shamt and masked bits of B are known zero.
14640 SDValue ShAmt = N00.getOperand(1);
14641 unsigned ShAmtC = ShAmt->getAsZExtVal();
14642 unsigned LSB = llvm::countr_zero(Mask);
14643 if (ShAmtC != LSB)
14644 return SDValue();
14645
14646 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14647 DAG.getConstant(~Mask, DL, MVT::i32));
14648
14649 DCI.CombineTo(N, Res, false);
14650 // Return value from the original node to inform the combiner than N is
14651 // now dead.
14652 return SDValue(N, 0);
14653 }
14654
14655 return SDValue();
14656}
14657
14658static bool isValidMVECond(unsigned CC, bool IsFloat) {
14659 switch (CC) {
14660 case ARMCC::EQ:
14661 case ARMCC::NE:
14662 case ARMCC::LE:
14663 case ARMCC::GT:
14664 case ARMCC::GE:
14665 case ARMCC::LT:
14666 return true;
14667 case ARMCC::HS:
14668 case ARMCC::HI:
14669 return !IsFloat;
14670 default:
14671 return false;
14672 };
14673}
14674
14676 if (N->getOpcode() == ARMISD::VCMP)
14677 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14678 else if (N->getOpcode() == ARMISD::VCMPZ)
14679 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14680 else
14681 llvm_unreachable("Not a VCMP/VCMPZ!");
14682}
14683
14686 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14687}
14688
14690 const ARMSubtarget *Subtarget) {
14691 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14692 // together with predicates
14693 EVT VT = N->getValueType(0);
14694 SDLoc DL(N);
14695 SDValue N0 = N->getOperand(0);
14696 SDValue N1 = N->getOperand(1);
14697
14698 auto IsFreelyInvertable = [&](SDValue V) {
14699 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14700 return CanInvertMVEVCMP(V);
14701 return false;
14702 };
14703
14704 // At least one operand must be freely invertable.
14705 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14706 return SDValue();
14707
14708 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14709 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14710 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14711 return DAG.getLogicalNOT(DL, And, VT);
14712}
14713
14714/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14717 const ARMSubtarget *Subtarget) {
14718 // Attempt to use immediate-form VORR
14719 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14720 SDLoc dl(N);
14721 EVT VT = N->getValueType(0);
14722 SelectionDAG &DAG = DCI.DAG;
14723
14724 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14725 return SDValue();
14726
14727 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14728 VT == MVT::v8i1 || VT == MVT::v16i1))
14729 return PerformORCombine_i1(N, DAG, Subtarget);
14730
14731 APInt SplatBits, SplatUndef;
14732 unsigned SplatBitSize;
14733 bool HasAnyUndefs;
14734 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14735 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14736 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14737 SplatBitSize == 64) {
14738 EVT VorrVT;
14739 SDValue Val =
14740 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14741 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14742 if (Val.getNode()) {
14743 SDValue Input =
14744 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14745 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14746 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14747 }
14748 }
14749 }
14750
14751 if (!Subtarget->isThumb1Only()) {
14752 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14753 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14754 return Result;
14755 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14756 return Result;
14757 }
14758
14759 SDValue N0 = N->getOperand(0);
14760 SDValue N1 = N->getOperand(1);
14761
14762 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14763 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14765
14766 // The code below optimizes (or (and X, Y), Z).
14767 // The AND operand needs to have a single user to make these optimizations
14768 // profitable.
14769 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14770 return SDValue();
14771
14772 APInt SplatUndef;
14773 unsigned SplatBitSize;
14774 bool HasAnyUndefs;
14775
14776 APInt SplatBits0, SplatBits1;
14777 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14778 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14779 // Ensure that the second operand of both ands are constants
14780 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14781 HasAnyUndefs) && !HasAnyUndefs) {
14782 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14783 HasAnyUndefs) && !HasAnyUndefs) {
14784 // Ensure that the bit width of the constants are the same and that
14785 // the splat arguments are logical inverses as per the pattern we
14786 // are trying to simplify.
14787 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14788 SplatBits0 == ~SplatBits1) {
14789 // Canonicalize the vector type to make instruction selection
14790 // simpler.
14791 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14792 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14793 N0->getOperand(1),
14794 N0->getOperand(0),
14795 N1->getOperand(0));
14796 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14797 }
14798 }
14799 }
14800 }
14801
14802 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14803 // reasonable.
14804 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14805 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14806 return Res;
14807 }
14808
14809 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14810 return Result;
14811
14812 return SDValue();
14813}
14814
14817 const ARMSubtarget *Subtarget) {
14818 EVT VT = N->getValueType(0);
14819 SelectionDAG &DAG = DCI.DAG;
14820
14821 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14822 return SDValue();
14823
14824 if (!Subtarget->isThumb1Only()) {
14825 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14826 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14827 return Result;
14828
14829 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14830 return Result;
14831 }
14832
14833 if (Subtarget->hasMVEIntegerOps()) {
14834 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14835 SDValue N0 = N->getOperand(0);
14836 SDValue N1 = N->getOperand(1);
14837 const TargetLowering *TLI = Subtarget->getTargetLowering();
14838 if (TLI->isConstTrueVal(N1) &&
14839 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14840 if (CanInvertMVEVCMP(N0)) {
14841 SDLoc DL(N0);
14843
14845 Ops.push_back(N0->getOperand(0));
14846 if (N0->getOpcode() == ARMISD::VCMP)
14847 Ops.push_back(N0->getOperand(1));
14848 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14849 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14850 }
14851 }
14852 }
14853
14854 return SDValue();
14855}
14856
14857// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14858// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14859// their position in "to" (Rd).
14860static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14861 assert(N->getOpcode() == ARMISD::BFI);
14862
14863 SDValue From = N->getOperand(1);
14864 ToMask = ~N->getConstantOperandAPInt(2);
14865 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14866
14867 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14868 // #C in the base of the SHR.
14869 if (From->getOpcode() == ISD::SRL &&
14870 isa<ConstantSDNode>(From->getOperand(1))) {
14871 APInt Shift = From->getConstantOperandAPInt(1);
14872 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14873 FromMask <<= Shift.getLimitedValue(31);
14874 From = From->getOperand(0);
14875 }
14876
14877 return From;
14878}
14879
14880// If A and B contain one contiguous set of bits, does A | B == A . B?
14881//
14882// Neither A nor B must be zero.
14883static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14884 unsigned LastActiveBitInA = A.countr_zero();
14885 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14886 return LastActiveBitInA - 1 == FirstActiveBitInB;
14887}
14888
14890 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14891 APInt ToMask, FromMask;
14892 SDValue From = ParseBFI(N, ToMask, FromMask);
14893 SDValue To = N->getOperand(0);
14894
14895 SDValue V = To;
14896 if (V.getOpcode() != ARMISD::BFI)
14897 return SDValue();
14898
14899 APInt NewToMask, NewFromMask;
14900 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14901 if (NewFrom != From)
14902 return SDValue();
14903
14904 // Do the written bits conflict with any we've seen so far?
14905 if ((NewToMask & ToMask).getBoolValue())
14906 // Conflicting bits.
14907 return SDValue();
14908
14909 // Are the new bits contiguous when combined with the old bits?
14910 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14911 BitsProperlyConcatenate(FromMask, NewFromMask))
14912 return V;
14913 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14914 BitsProperlyConcatenate(NewFromMask, FromMask))
14915 return V;
14916
14917 return SDValue();
14918}
14919
14921 SDValue N0 = N->getOperand(0);
14922 SDValue N1 = N->getOperand(1);
14923
14924 if (N1.getOpcode() == ISD::AND) {
14925 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14926 // the bits being cleared by the AND are not demanded by the BFI.
14927 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14928 if (!N11C)
14929 return SDValue();
14930 unsigned InvMask = N->getConstantOperandVal(2);
14931 unsigned LSB = llvm::countr_zero(~InvMask);
14932 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14933 assert(Width <
14934 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14935 "undefined behavior");
14936 unsigned Mask = (1u << Width) - 1;
14937 unsigned Mask2 = N11C->getZExtValue();
14938 if ((Mask & (~Mask2)) == 0)
14939 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14940 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14941 return SDValue();
14942 }
14943
14944 // Look for another BFI to combine with.
14945 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14946 // We've found a BFI.
14947 APInt ToMask1, FromMask1;
14948 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14949
14950 APInt ToMask2, FromMask2;
14951 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14952 assert(From1 == From2);
14953 (void)From2;
14954
14955 // Create a new BFI, combining the two together.
14956 APInt NewFromMask = FromMask1 | FromMask2;
14957 APInt NewToMask = ToMask1 | ToMask2;
14958
14959 EVT VT = N->getValueType(0);
14960 SDLoc dl(N);
14961
14962 if (NewFromMask[0] == 0)
14963 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14964 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14965 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14966 DAG.getConstant(~NewToMask, dl, VT));
14967 }
14968
14969 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14970 // that lower bit insertions are performed first, providing that M1 and M2
14971 // do no overlap. This can allow multiple BFI instructions to be combined
14972 // together by the other folds above.
14973 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14974 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14975 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14976
14977 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14978 ToMask1.countl_zero() < ToMask2.countl_zero())
14979 return SDValue();
14980
14981 EVT VT = N->getValueType(0);
14982 SDLoc dl(N);
14983 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14984 N->getOperand(1), N->getOperand(2));
14985 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14986 N0.getOperand(2));
14987 }
14988
14989 return SDValue();
14990}
14991
14992// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14993// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14994// return X if valid.
14996 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14997 return SDValue();
14998 SDValue CSInc = Cmp->getOperand(0);
14999
15000 // Ignore any `And 1` nodes that may not yet have been removed. We are
15001 // looking for a value that produces 1/0, so these have no effect on the
15002 // code.
15003 while (CSInc.getOpcode() == ISD::AND &&
15004 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
15005 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15006 CSInc = CSInc.getOperand(0);
15007
15008 if (CSInc.getOpcode() == ARMISD::CSINC &&
15009 isNullConstant(CSInc.getOperand(0)) &&
15010 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15012 return CSInc.getOperand(3);
15013 }
15014 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15015 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15017 return CSInc.getOperand(4);
15018 }
15019 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15020 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15023 return CSInc.getOperand(4);
15024 }
15025 return SDValue();
15026}
15027
15029 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15030 // t92: glue = ARMISD::CMPZ t74, 0
15031 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15032 // t96: glue = ARMISD::CMPZ t93, 0
15033 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15035 if (SDValue C = IsCMPZCSINC(N, Cond))
15036 if (Cond == ARMCC::EQ)
15037 return C;
15038 return SDValue();
15039}
15040
15042 // Fold away an unneccessary CMPZ/CSINC
15043 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15044 // if C1==EQ -> CSXYZ A, B, C2, D
15045 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15047 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15048 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15049 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15050 N->getOperand(1),
15051 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15052 if (N->getConstantOperandVal(2) == ARMCC::NE)
15053 return DAG.getNode(
15054 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15055 N->getOperand(1),
15057 }
15058 return SDValue();
15059}
15060
15061/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15062/// ARMISD::VMOVRRD.
15065 const ARMSubtarget *Subtarget) {
15066 // vmovrrd(vmovdrr x, y) -> x,y
15067 SDValue InDouble = N->getOperand(0);
15068 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15069 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15070
15071 // vmovrrd(load f64) -> (load i32), (load i32)
15072 SDNode *InNode = InDouble.getNode();
15073 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15074 InNode->getValueType(0) == MVT::f64 &&
15075 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15076 !cast<LoadSDNode>(InNode)->isVolatile()) {
15077 // TODO: Should this be done for non-FrameIndex operands?
15078 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15079
15080 SelectionDAG &DAG = DCI.DAG;
15081 SDLoc DL(LD);
15082 SDValue BasePtr = LD->getBasePtr();
15083 SDValue NewLD1 =
15084 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15085 LD->getAlign(), LD->getMemOperand()->getFlags());
15086
15087 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15088 DAG.getConstant(4, DL, MVT::i32));
15089
15090 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15091 LD->getPointerInfo().getWithOffset(4),
15092 commonAlignment(LD->getAlign(), 4),
15093 LD->getMemOperand()->getFlags());
15094
15095 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15096 if (DCI.DAG.getDataLayout().isBigEndian())
15097 std::swap (NewLD1, NewLD2);
15098 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15099 return Result;
15100 }
15101
15102 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15103 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15104 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15105 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15106 SDValue BV = InDouble.getOperand(0);
15107 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15108 // change lane order under big endian.
15109 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15110 while (
15111 (BV.getOpcode() == ISD::BITCAST ||
15113 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15114 BVSwap = BV.getOpcode() == ISD::BITCAST;
15115 BV = BV.getOperand(0);
15116 }
15117 if (BV.getValueType() != MVT::v4i32)
15118 return SDValue();
15119
15120 // Handle buildvectors, pulling out the correct lane depending on
15121 // endianness.
15122 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15123 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15124 SDValue Op0 = BV.getOperand(Offset);
15125 SDValue Op1 = BV.getOperand(Offset + 1);
15126 if (!Subtarget->isLittle() && BVSwap)
15127 std::swap(Op0, Op1);
15128
15129 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15130 }
15131
15132 // A chain of insert_vectors, grabbing the correct value of the chain of
15133 // inserts.
15134 SDValue Op0, Op1;
15135 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15136 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15137 if (BV.getConstantOperandVal(2) == Offset)
15138 Op0 = BV.getOperand(1);
15139 if (BV.getConstantOperandVal(2) == Offset + 1)
15140 Op1 = BV.getOperand(1);
15141 }
15142 BV = BV.getOperand(0);
15143 }
15144 if (!Subtarget->isLittle() && BVSwap)
15145 std::swap(Op0, Op1);
15146 if (Op0 && Op1)
15147 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15148 }
15149
15150 return SDValue();
15151}
15152
15153/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15154/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15156 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15157 SDValue Op0 = N->getOperand(0);
15158 SDValue Op1 = N->getOperand(1);
15159 if (Op0.getOpcode() == ISD::BITCAST)
15160 Op0 = Op0.getOperand(0);
15161 if (Op1.getOpcode() == ISD::BITCAST)
15162 Op1 = Op1.getOperand(0);
15163 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15164 Op0.getNode() == Op1.getNode() &&
15165 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15166 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15167 N->getValueType(0), Op0.getOperand(0));
15168 return SDValue();
15169}
15170
15173 SDValue Op0 = N->getOperand(0);
15174
15175 // VMOVhr (VMOVrh (X)) -> X
15176 if (Op0->getOpcode() == ARMISD::VMOVrh)
15177 return Op0->getOperand(0);
15178
15179 // FullFP16: half values are passed in S-registers, and we don't
15180 // need any of the bitcast and moves:
15181 //
15182 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15183 // t5: i32 = bitcast t2
15184 // t18: f16 = ARMISD::VMOVhr t5
15185 // =>
15186 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15187 if (Op0->getOpcode() == ISD::BITCAST) {
15188 SDValue Copy = Op0->getOperand(0);
15189 if (Copy.getValueType() == MVT::f32 &&
15190 Copy->getOpcode() == ISD::CopyFromReg) {
15191 bool HasGlue = Copy->getNumOperands() == 3;
15192 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15193 HasGlue ? Copy->getOperand(2) : SDValue()};
15194 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15195 SDValue NewCopy =
15197 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15198 ArrayRef(Ops, HasGlue ? 3 : 2));
15199
15200 // Update Users, Chains, and Potential Glue.
15201 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15202 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15203 if (HasGlue)
15204 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15205 NewCopy.getValue(2));
15206
15207 return NewCopy;
15208 }
15209 }
15210
15211 // fold (VMOVhr (load x)) -> (load (f16*)x)
15212 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15213 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15214 LN0->getMemoryVT() == MVT::i16) {
15215 SDValue Load =
15216 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15217 LN0->getBasePtr(), LN0->getMemOperand());
15218 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15219 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15220 return Load;
15221 }
15222 }
15223
15224 // Only the bottom 16 bits of the source register are used.
15225 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15226 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15227 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15228 return SDValue(N, 0);
15229
15230 return SDValue();
15231}
15232
15234 SDValue N0 = N->getOperand(0);
15235 EVT VT = N->getValueType(0);
15236
15237 // fold (VMOVrh (fpconst x)) -> const x
15238 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15239 APFloat V = C->getValueAPF();
15240 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15241 }
15242
15243 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15244 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15245 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15246
15247 SDValue Load =
15248 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15249 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15250 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15251 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15252 return Load;
15253 }
15254
15255 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15256 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15257 isa<ConstantSDNode>(N0->getOperand(1)))
15258 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15259 N0->getOperand(1));
15260
15261 return SDValue();
15262}
15263
15264/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15265/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15266/// i64 vector to have f64 elements, since the value can then be loaded
15267/// directly into a VFP register.
15269 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15270 for (unsigned i = 0; i < NumElts; ++i) {
15271 SDNode *Elt = N->getOperand(i).getNode();
15272 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15273 return true;
15274 }
15275 return false;
15276}
15277
15278/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15279/// ISD::BUILD_VECTOR.
15282 const ARMSubtarget *Subtarget) {
15283 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15284 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15285 // into a pair of GPRs, which is fine when the value is used as a scalar,
15286 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15287 SelectionDAG &DAG = DCI.DAG;
15288 if (N->getNumOperands() == 2)
15289 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15290 return RV;
15291
15292 // Load i64 elements as f64 values so that type legalization does not split
15293 // them up into i32 values.
15294 EVT VT = N->getValueType(0);
15295 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15296 return SDValue();
15297 SDLoc dl(N);
15299 unsigned NumElts = VT.getVectorNumElements();
15300 for (unsigned i = 0; i < NumElts; ++i) {
15301 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15302 Ops.push_back(V);
15303 // Make the DAGCombiner fold the bitcast.
15304 DCI.AddToWorklist(V.getNode());
15305 }
15306 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15307 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15308 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15309}
15310
15311/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15312static SDValue
15314 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15315 // At that time, we may have inserted bitcasts from integer to float.
15316 // If these bitcasts have survived DAGCombine, change the lowering of this
15317 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15318 // force to use floating point types.
15319
15320 // Make sure we can change the type of the vector.
15321 // This is possible iff:
15322 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15323 // 1.1. Vector is used only once.
15324 // 1.2. Use is a bit convert to an integer type.
15325 // 2. The size of its operands are 32-bits (64-bits are not legal).
15326 EVT VT = N->getValueType(0);
15327 EVT EltVT = VT.getVectorElementType();
15328
15329 // Check 1.1. and 2.
15330 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15331 return SDValue();
15332
15333 // By construction, the input type must be float.
15334 assert(EltVT == MVT::f32 && "Unexpected type!");
15335
15336 // Check 1.2.
15337 SDNode *Use = *N->use_begin();
15338 if (Use->getOpcode() != ISD::BITCAST ||
15339 Use->getValueType(0).isFloatingPoint())
15340 return SDValue();
15341
15342 // Check profitability.
15343 // Model is, if more than half of the relevant operands are bitcast from
15344 // i32, turn the build_vector into a sequence of insert_vector_elt.
15345 // Relevant operands are everything that is not statically
15346 // (i.e., at compile time) bitcasted.
15347 unsigned NumOfBitCastedElts = 0;
15348 unsigned NumElts = VT.getVectorNumElements();
15349 unsigned NumOfRelevantElts = NumElts;
15350 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15351 SDValue Elt = N->getOperand(Idx);
15352 if (Elt->getOpcode() == ISD::BITCAST) {
15353 // Assume only bit cast to i32 will go away.
15354 if (Elt->getOperand(0).getValueType() == MVT::i32)
15355 ++NumOfBitCastedElts;
15356 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15357 // Constants are statically casted, thus do not count them as
15358 // relevant operands.
15359 --NumOfRelevantElts;
15360 }
15361
15362 // Check if more than half of the elements require a non-free bitcast.
15363 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15364 return SDValue();
15365
15366 SelectionDAG &DAG = DCI.DAG;
15367 // Create the new vector type.
15368 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15369 // Check if the type is legal.
15370 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15371 if (!TLI.isTypeLegal(VecVT))
15372 return SDValue();
15373
15374 // Combine:
15375 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15376 // => BITCAST INSERT_VECTOR_ELT
15377 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15378 // (BITCAST EN), N.
15379 SDValue Vec = DAG.getUNDEF(VecVT);
15380 SDLoc dl(N);
15381 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15382 SDValue V = N->getOperand(Idx);
15383 if (V.isUndef())
15384 continue;
15385 if (V.getOpcode() == ISD::BITCAST &&
15386 V->getOperand(0).getValueType() == MVT::i32)
15387 // Fold obvious case.
15388 V = V.getOperand(0);
15389 else {
15390 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15391 // Make the DAGCombiner fold the bitcasts.
15392 DCI.AddToWorklist(V.getNode());
15393 }
15394 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15395 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15396 }
15397 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15398 // Make the DAGCombiner fold the bitcasts.
15399 DCI.AddToWorklist(Vec.getNode());
15400 return Vec;
15401}
15402
15403static SDValue
15405 EVT VT = N->getValueType(0);
15406 SDValue Op = N->getOperand(0);
15407 SDLoc dl(N);
15408
15409 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15410 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15411 // If the valuetypes are the same, we can remove the cast entirely.
15412 if (Op->getOperand(0).getValueType() == VT)
15413 return Op->getOperand(0);
15414 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15415 }
15416
15417 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15418 // more VPNOT which might get folded as else predicates.
15419 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15420 SDValue X =
15421 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15423 DCI.DAG.getConstant(65535, dl, MVT::i32));
15424 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15425 }
15426
15427 // Only the bottom 16 bits of the source register are used.
15428 if (Op.getValueType() == MVT::i32) {
15429 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15430 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15431 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15432 return SDValue(N, 0);
15433 }
15434 return SDValue();
15435}
15436
15438 const ARMSubtarget *ST) {
15439 EVT VT = N->getValueType(0);
15440 SDValue Op = N->getOperand(0);
15441 SDLoc dl(N);
15442
15443 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15444 if (ST->isLittle())
15445 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15446
15447 // VECTOR_REG_CAST undef -> undef
15448 if (Op.isUndef())
15449 return DAG.getUNDEF(VT);
15450
15451 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15452 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15453 // If the valuetypes are the same, we can remove the cast entirely.
15454 if (Op->getOperand(0).getValueType() == VT)
15455 return Op->getOperand(0);
15456 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15457 }
15458
15459 return SDValue();
15460}
15461
15463 const ARMSubtarget *Subtarget) {
15464 if (!Subtarget->hasMVEIntegerOps())
15465 return SDValue();
15466
15467 EVT VT = N->getValueType(0);
15468 SDValue Op0 = N->getOperand(0);
15469 SDValue Op1 = N->getOperand(1);
15470 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15471 SDLoc dl(N);
15472
15473 // vcmp X, 0, cc -> vcmpz X, cc
15474 if (isZeroVector(Op1))
15475 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15476
15477 unsigned SwappedCond = getSwappedCondition(Cond);
15478 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15479 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15480 if (isZeroVector(Op0))
15481 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15482 DAG.getConstant(SwappedCond, dl, MVT::i32));
15483 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15484 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15485 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15486 DAG.getConstant(SwappedCond, dl, MVT::i32));
15487 }
15488
15489 return SDValue();
15490}
15491
15492/// PerformInsertEltCombine - Target-specific dag combine xforms for
15493/// ISD::INSERT_VECTOR_ELT.
15496 // Bitcast an i64 load inserted into a vector to f64.
15497 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15498 EVT VT = N->getValueType(0);
15499 SDNode *Elt = N->getOperand(1).getNode();
15500 if (VT.getVectorElementType() != MVT::i64 ||
15501 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15502 return SDValue();
15503
15504 SelectionDAG &DAG = DCI.DAG;
15505 SDLoc dl(N);
15506 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15508 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15509 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15510 // Make the DAGCombiner fold the bitcasts.
15511 DCI.AddToWorklist(Vec.getNode());
15512 DCI.AddToWorklist(V.getNode());
15513 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15514 Vec, V, N->getOperand(2));
15515 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15516}
15517
15518// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15519// directly or bitcast to an integer if the original is a float vector.
15520// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15521// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15522static SDValue
15524 EVT VT = N->getValueType(0);
15525 SDLoc dl(N);
15526
15527 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15528 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15529 return SDValue();
15530
15531 SDValue Ext = SDValue(N, 0);
15532 if (Ext.getOpcode() == ISD::BITCAST &&
15533 Ext.getOperand(0).getValueType() == MVT::f32)
15534 Ext = Ext.getOperand(0);
15535 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15536 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15537 Ext.getConstantOperandVal(1) % 2 != 0)
15538 return SDValue();
15539 if (Ext->use_size() == 1 &&
15540 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15541 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15542 return SDValue();
15543
15544 SDValue Op0 = Ext.getOperand(0);
15545 EVT VecVT = Op0.getValueType();
15546 unsigned ResNo = Op0.getResNo();
15547 unsigned Lane = Ext.getConstantOperandVal(1);
15548 if (VecVT.getVectorNumElements() != 4)
15549 return SDValue();
15550
15551 // Find another extract, of Lane + 1
15552 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15553 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15554 isa<ConstantSDNode>(V->getOperand(1)) &&
15555 V->getConstantOperandVal(1) == Lane + 1 &&
15556 V->getOperand(0).getResNo() == ResNo;
15557 });
15558 if (OtherIt == Op0->uses().end())
15559 return SDValue();
15560
15561 // For float extracts, we need to be converting to a i32 for both vector
15562 // lanes.
15563 SDValue OtherExt(*OtherIt, 0);
15564 if (OtherExt.getValueType() != MVT::i32) {
15565 if (OtherExt->use_size() != 1 ||
15566 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15567 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15568 return SDValue();
15569 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15570 }
15571
15572 // Convert the type to a f64 and extract with a VMOVRRD.
15573 SDValue F64 = DCI.DAG.getNode(
15574 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15575 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15576 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15577 SDValue VMOVRRD =
15578 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15579
15580 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15581 return VMOVRRD;
15582}
15583
15586 const ARMSubtarget *ST) {
15587 SDValue Op0 = N->getOperand(0);
15588 EVT VT = N->getValueType(0);
15589 SDLoc dl(N);
15590
15591 // extract (vdup x) -> x
15592 if (Op0->getOpcode() == ARMISD::VDUP) {
15593 SDValue X = Op0->getOperand(0);
15594 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15595 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15596 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15597 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15598 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15599 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15600
15601 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15602 X = X->getOperand(0);
15603 if (X.getValueType() == VT)
15604 return X;
15605 }
15606
15607 // extract ARM_BUILD_VECTOR -> x
15608 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15609 isa<ConstantSDNode>(N->getOperand(1)) &&
15610 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15611 return Op0.getOperand(N->getConstantOperandVal(1));
15612 }
15613
15614 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15615 if (Op0.getValueType() == MVT::v4i32 &&
15616 isa<ConstantSDNode>(N->getOperand(1)) &&
15617 Op0.getOpcode() == ISD::BITCAST &&
15619 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15620 SDValue BV = Op0.getOperand(0);
15621 unsigned Offset = N->getConstantOperandVal(1);
15622 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15623 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15624 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15625 }
15626
15627 // extract x, n; extract x, n+1 -> VMOVRRD x
15628 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15629 return R;
15630
15631 // extract (MVETrunc(x)) -> extract x
15632 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15633 unsigned Idx = N->getConstantOperandVal(1);
15634 unsigned Vec =
15636 unsigned SubIdx =
15638 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15639 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15640 }
15641
15642 return SDValue();
15643}
15644
15646 SDValue Op = N->getOperand(0);
15647 EVT VT = N->getValueType(0);
15648
15649 // sext_inreg(VGETLANEu) -> VGETLANEs
15650 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15651 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15652 Op.getOperand(0).getValueType().getScalarType())
15653 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15654 Op.getOperand(1));
15655
15656 return SDValue();
15657}
15658
15659static SDValue
15661 SDValue Vec = N->getOperand(0);
15662 SDValue SubVec = N->getOperand(1);
15663 uint64_t IdxVal = N->getConstantOperandVal(2);
15664 EVT VecVT = Vec.getValueType();
15665 EVT SubVT = SubVec.getValueType();
15666
15667 // Only do this for legal fixed vector types.
15668 if (!VecVT.isFixedLengthVector() ||
15669 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15671 return SDValue();
15672
15673 // Ignore widening patterns.
15674 if (IdxVal == 0 && Vec.isUndef())
15675 return SDValue();
15676
15677 // Subvector must be half the width and an "aligned" insertion.
15678 unsigned NumSubElts = SubVT.getVectorNumElements();
15679 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15680 (IdxVal != 0 && IdxVal != NumSubElts))
15681 return SDValue();
15682
15683 // Fold insert_subvector -> concat_vectors
15684 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15685 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15686 SDLoc DL(N);
15687 SDValue Lo, Hi;
15688 if (IdxVal == 0) {
15689 Lo = SubVec;
15690 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15691 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15692 } else {
15693 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15694 DCI.DAG.getVectorIdxConstant(0, DL));
15695 Hi = SubVec;
15696 }
15697 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15698}
15699
15700// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15702 SelectionDAG &DAG) {
15703 SDValue Trunc = N->getOperand(0);
15704 EVT VT = Trunc.getValueType();
15705 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15706 return SDValue();
15707
15708 SDLoc DL(Trunc);
15709 if (isVMOVNTruncMask(N->getMask(), VT, false))
15710 return DAG.getNode(
15711 ARMISD::VMOVN, DL, VT,
15712 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15713 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15714 DAG.getConstant(1, DL, MVT::i32));
15715 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15716 return DAG.getNode(
15717 ARMISD::VMOVN, DL, VT,
15718 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15719 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15720 DAG.getConstant(1, DL, MVT::i32));
15721 return SDValue();
15722}
15723
15724/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15725/// ISD::VECTOR_SHUFFLE.
15727 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15728 return R;
15729
15730 // The LLVM shufflevector instruction does not require the shuffle mask
15731 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15732 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15733 // operands do not match the mask length, they are extended by concatenating
15734 // them with undef vectors. That is probably the right thing for other
15735 // targets, but for NEON it is better to concatenate two double-register
15736 // size vector operands into a single quad-register size vector. Do that
15737 // transformation here:
15738 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15739 // shuffle(concat(v1, v2), undef)
15740 SDValue Op0 = N->getOperand(0);
15741 SDValue Op1 = N->getOperand(1);
15742 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15743 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15744 Op0.getNumOperands() != 2 ||
15745 Op1.getNumOperands() != 2)
15746 return SDValue();
15747 SDValue Concat0Op1 = Op0.getOperand(1);
15748 SDValue Concat1Op1 = Op1.getOperand(1);
15749 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15750 return SDValue();
15751 // Skip the transformation if any of the types are illegal.
15752 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15753 EVT VT = N->getValueType(0);
15754 if (!TLI.isTypeLegal(VT) ||
15755 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15756 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15757 return SDValue();
15758
15759 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15760 Op0.getOperand(0), Op1.getOperand(0));
15761 // Translate the shuffle mask.
15762 SmallVector<int, 16> NewMask;
15763 unsigned NumElts = VT.getVectorNumElements();
15764 unsigned HalfElts = NumElts/2;
15765 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15766 for (unsigned n = 0; n < NumElts; ++n) {
15767 int MaskElt = SVN->getMaskElt(n);
15768 int NewElt = -1;
15769 if (MaskElt < (int)HalfElts)
15770 NewElt = MaskElt;
15771 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15772 NewElt = HalfElts + MaskElt - NumElts;
15773 NewMask.push_back(NewElt);
15774 }
15775 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15776 DAG.getUNDEF(VT), NewMask);
15777}
15778
15779/// Load/store instruction that can be merged with a base address
15780/// update
15785 unsigned AddrOpIdx;
15786};
15787
15789 /// Instruction that updates a pointer
15791 /// Pointer increment operand
15793 /// Pointer increment value if it is a constant, or 0 otherwise
15794 unsigned ConstInc;
15795};
15796
15798 struct BaseUpdateUser &User,
15799 bool SimpleConstIncOnly,
15801 SelectionDAG &DAG = DCI.DAG;
15802 SDNode *N = Target.N;
15803 MemSDNode *MemN = cast<MemSDNode>(N);
15804 SDLoc dl(N);
15805
15806 // Find the new opcode for the updating load/store.
15807 bool isLoadOp = true;
15808 bool isLaneOp = false;
15809 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15810 // as an operand.
15811 bool hasAlignment = true;
15812 unsigned NewOpc = 0;
15813 unsigned NumVecs = 0;
15814 if (Target.isIntrinsic) {
15815 unsigned IntNo = N->getConstantOperandVal(1);
15816 switch (IntNo) {
15817 default:
15818 llvm_unreachable("unexpected intrinsic for Neon base update");
15819 case Intrinsic::arm_neon_vld1:
15820 NewOpc = ARMISD::VLD1_UPD;
15821 NumVecs = 1;
15822 break;
15823 case Intrinsic::arm_neon_vld2:
15824 NewOpc = ARMISD::VLD2_UPD;
15825 NumVecs = 2;
15826 break;
15827 case Intrinsic::arm_neon_vld3:
15828 NewOpc = ARMISD::VLD3_UPD;
15829 NumVecs = 3;
15830 break;
15831 case Intrinsic::arm_neon_vld4:
15832 NewOpc = ARMISD::VLD4_UPD;
15833 NumVecs = 4;
15834 break;
15835 case Intrinsic::arm_neon_vld1x2:
15836 NewOpc = ARMISD::VLD1x2_UPD;
15837 NumVecs = 2;
15838 hasAlignment = false;
15839 break;
15840 case Intrinsic::arm_neon_vld1x3:
15841 NewOpc = ARMISD::VLD1x3_UPD;
15842 NumVecs = 3;
15843 hasAlignment = false;
15844 break;
15845 case Intrinsic::arm_neon_vld1x4:
15846 NewOpc = ARMISD::VLD1x4_UPD;
15847 NumVecs = 4;
15848 hasAlignment = false;
15849 break;
15850 case Intrinsic::arm_neon_vld2dup:
15851 NewOpc = ARMISD::VLD2DUP_UPD;
15852 NumVecs = 2;
15853 break;
15854 case Intrinsic::arm_neon_vld3dup:
15855 NewOpc = ARMISD::VLD3DUP_UPD;
15856 NumVecs = 3;
15857 break;
15858 case Intrinsic::arm_neon_vld4dup:
15859 NewOpc = ARMISD::VLD4DUP_UPD;
15860 NumVecs = 4;
15861 break;
15862 case Intrinsic::arm_neon_vld2lane:
15863 NewOpc = ARMISD::VLD2LN_UPD;
15864 NumVecs = 2;
15865 isLaneOp = true;
15866 break;
15867 case Intrinsic::arm_neon_vld3lane:
15868 NewOpc = ARMISD::VLD3LN_UPD;
15869 NumVecs = 3;
15870 isLaneOp = true;
15871 break;
15872 case Intrinsic::arm_neon_vld4lane:
15873 NewOpc = ARMISD::VLD4LN_UPD;
15874 NumVecs = 4;
15875 isLaneOp = true;
15876 break;
15877 case Intrinsic::arm_neon_vst1:
15878 NewOpc = ARMISD::VST1_UPD;
15879 NumVecs = 1;
15880 isLoadOp = false;
15881 break;
15882 case Intrinsic::arm_neon_vst2:
15883 NewOpc = ARMISD::VST2_UPD;
15884 NumVecs = 2;
15885 isLoadOp = false;
15886 break;
15887 case Intrinsic::arm_neon_vst3:
15888 NewOpc = ARMISD::VST3_UPD;
15889 NumVecs = 3;
15890 isLoadOp = false;
15891 break;
15892 case Intrinsic::arm_neon_vst4:
15893 NewOpc = ARMISD::VST4_UPD;
15894 NumVecs = 4;
15895 isLoadOp = false;
15896 break;
15897 case Intrinsic::arm_neon_vst2lane:
15898 NewOpc = ARMISD::VST2LN_UPD;
15899 NumVecs = 2;
15900 isLoadOp = false;
15901 isLaneOp = true;
15902 break;
15903 case Intrinsic::arm_neon_vst3lane:
15904 NewOpc = ARMISD::VST3LN_UPD;
15905 NumVecs = 3;
15906 isLoadOp = false;
15907 isLaneOp = true;
15908 break;
15909 case Intrinsic::arm_neon_vst4lane:
15910 NewOpc = ARMISD::VST4LN_UPD;
15911 NumVecs = 4;
15912 isLoadOp = false;
15913 isLaneOp = true;
15914 break;
15915 case Intrinsic::arm_neon_vst1x2:
15916 NewOpc = ARMISD::VST1x2_UPD;
15917 NumVecs = 2;
15918 isLoadOp = false;
15919 hasAlignment = false;
15920 break;
15921 case Intrinsic::arm_neon_vst1x3:
15922 NewOpc = ARMISD::VST1x3_UPD;
15923 NumVecs = 3;
15924 isLoadOp = false;
15925 hasAlignment = false;
15926 break;
15927 case Intrinsic::arm_neon_vst1x4:
15928 NewOpc = ARMISD::VST1x4_UPD;
15929 NumVecs = 4;
15930 isLoadOp = false;
15931 hasAlignment = false;
15932 break;
15933 }
15934 } else {
15935 isLaneOp = true;
15936 switch (N->getOpcode()) {
15937 default:
15938 llvm_unreachable("unexpected opcode for Neon base update");
15939 case ARMISD::VLD1DUP:
15940 NewOpc = ARMISD::VLD1DUP_UPD;
15941 NumVecs = 1;
15942 break;
15943 case ARMISD::VLD2DUP:
15944 NewOpc = ARMISD::VLD2DUP_UPD;
15945 NumVecs = 2;
15946 break;
15947 case ARMISD::VLD3DUP:
15948 NewOpc = ARMISD::VLD3DUP_UPD;
15949 NumVecs = 3;
15950 break;
15951 case ARMISD::VLD4DUP:
15952 NewOpc = ARMISD::VLD4DUP_UPD;
15953 NumVecs = 4;
15954 break;
15955 case ISD::LOAD:
15956 NewOpc = ARMISD::VLD1_UPD;
15957 NumVecs = 1;
15958 isLaneOp = false;
15959 break;
15960 case ISD::STORE:
15961 NewOpc = ARMISD::VST1_UPD;
15962 NumVecs = 1;
15963 isLaneOp = false;
15964 isLoadOp = false;
15965 break;
15966 }
15967 }
15968
15969 // Find the size of memory referenced by the load/store.
15970 EVT VecTy;
15971 if (isLoadOp) {
15972 VecTy = N->getValueType(0);
15973 } else if (Target.isIntrinsic) {
15974 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15975 } else {
15976 assert(Target.isStore &&
15977 "Node has to be a load, a store, or an intrinsic!");
15978 VecTy = N->getOperand(1).getValueType();
15979 }
15980
15981 bool isVLDDUPOp =
15982 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15983 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15984
15985 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15986 if (isLaneOp || isVLDDUPOp)
15987 NumBytes /= VecTy.getVectorNumElements();
15988
15989 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15990 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15991 // separate instructions that make it harder to use a non-constant update.
15992 return false;
15993 }
15994
15995 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15996 return false;
15997
15998 // OK, we found an ADD we can fold into the base update.
15999 // Now, create a _UPD node, taking care of not breaking alignment.
16000
16001 EVT AlignedVecTy = VecTy;
16002 Align Alignment = MemN->getAlign();
16003
16004 // If this is a less-than-standard-aligned load/store, change the type to
16005 // match the standard alignment.
16006 // The alignment is overlooked when selecting _UPD variants; and it's
16007 // easier to introduce bitcasts here than fix that.
16008 // There are 3 ways to get to this base-update combine:
16009 // - intrinsics: they are assumed to be properly aligned (to the standard
16010 // alignment of the memory type), so we don't need to do anything.
16011 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16012 // intrinsics, so, likewise, there's nothing to do.
16013 // - generic load/store instructions: the alignment is specified as an
16014 // explicit operand, rather than implicitly as the standard alignment
16015 // of the memory type (like the intrisics). We need to change the
16016 // memory type to match the explicit alignment. That way, we don't
16017 // generate non-standard-aligned ARMISD::VLDx nodes.
16018 if (isa<LSBaseSDNode>(N)) {
16019 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16020 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16021 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16022 assert(!isLaneOp && "Unexpected generic load/store lane.");
16023 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16024 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16025 }
16026 // Don't set an explicit alignment on regular load/stores that we want
16027 // to transform to VLD/VST 1_UPD nodes.
16028 // This matches the behavior of regular load/stores, which only get an
16029 // explicit alignment if the MMO alignment is larger than the standard
16030 // alignment of the memory type.
16031 // Intrinsics, however, always get an explicit alignment, set to the
16032 // alignment of the MMO.
16033 Alignment = Align(1);
16034 }
16035
16036 // Create the new updating load/store node.
16037 // First, create an SDVTList for the new updating node's results.
16038 EVT Tys[6];
16039 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16040 unsigned n;
16041 for (n = 0; n < NumResultVecs; ++n)
16042 Tys[n] = AlignedVecTy;
16043 Tys[n++] = MVT::i32;
16044 Tys[n] = MVT::Other;
16045 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16046
16047 // Then, gather the new node's operands.
16049 Ops.push_back(N->getOperand(0)); // incoming chain
16050 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16051 Ops.push_back(User.Inc);
16052
16053 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16054 // Try to match the intrinsic's signature
16055 Ops.push_back(StN->getValue());
16056 } else {
16057 // Loads (and of course intrinsics) match the intrinsics' signature,
16058 // so just add all but the alignment operand.
16059 unsigned LastOperand =
16060 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16061 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16062 Ops.push_back(N->getOperand(i));
16063 }
16064
16065 // For all node types, the alignment operand is always the last one.
16066 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16067
16068 // If this is a non-standard-aligned STORE, the penultimate operand is the
16069 // stored value. Bitcast it to the aligned type.
16070 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16071 SDValue &StVal = Ops[Ops.size() - 2];
16072 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16073 }
16074
16075 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16076 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16077 MemN->getMemOperand());
16078
16079 // Update the uses.
16080 SmallVector<SDValue, 5> NewResults;
16081 for (unsigned i = 0; i < NumResultVecs; ++i)
16082 NewResults.push_back(SDValue(UpdN.getNode(), i));
16083
16084 // If this is an non-standard-aligned LOAD, the first result is the loaded
16085 // value. Bitcast it to the expected result type.
16086 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16087 SDValue &LdVal = NewResults[0];
16088 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16089 }
16090
16091 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16092 DCI.CombineTo(N, NewResults);
16093 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16094
16095 return true;
16096}
16097
16098// If (opcode ptr inc) is and ADD-like instruction, return the
16099// increment value. Otherwise return 0.
16100static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16101 SDValue Inc, const SelectionDAG &DAG) {
16102 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16103 if (!CInc)
16104 return 0;
16105
16106 switch (Opcode) {
16107 case ARMISD::VLD1_UPD:
16108 case ISD::ADD:
16109 return CInc->getZExtValue();
16110 case ISD::OR: {
16111 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16112 // (OR ptr inc) is the same as (ADD ptr inc)
16113 return CInc->getZExtValue();
16114 }
16115 return 0;
16116 }
16117 default:
16118 return 0;
16119 }
16120}
16121
16123 switch (N->getOpcode()) {
16124 case ISD::ADD:
16125 case ISD::OR: {
16126 if (isa<ConstantSDNode>(N->getOperand(1))) {
16127 *Ptr = N->getOperand(0);
16128 *CInc = N->getOperand(1);
16129 return true;
16130 }
16131 return false;
16132 }
16133 case ARMISD::VLD1_UPD: {
16134 if (isa<ConstantSDNode>(N->getOperand(2))) {
16135 *Ptr = N->getOperand(1);
16136 *CInc = N->getOperand(2);
16137 return true;
16138 }
16139 return false;
16140 }
16141 default:
16142 return false;
16143 }
16144}
16145
16147 // Check that the add is independent of the load/store.
16148 // Otherwise, folding it would create a cycle. Search through Addr
16149 // as well, since the User may not be a direct user of Addr and
16150 // only share a base pointer.
16153 Worklist.push_back(N);
16154 Worklist.push_back(User);
16155 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16156 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16157 return false;
16158 return true;
16159}
16160
16161/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16162/// NEON load/store intrinsics, and generic vector load/stores, to merge
16163/// base address updates.
16164/// For generic load/stores, the memory type is assumed to be a vector.
16165/// The caller is assumed to have checked legality.
16168 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16169 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16170 const bool isStore = N->getOpcode() == ISD::STORE;
16171 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16172 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16173
16174 SDValue Addr = N->getOperand(AddrOpIdx);
16175
16177
16178 // Search for a use of the address operand that is an increment.
16179 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16180 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16181 SDNode *User = *UI;
16182 if (UI.getUse().getResNo() != Addr.getResNo() ||
16183 User->getNumOperands() != 2)
16184 continue;
16185
16186 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16187 unsigned ConstInc =
16188 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16189
16190 if (ConstInc || User->getOpcode() == ISD::ADD)
16191 BaseUpdates.push_back({User, Inc, ConstInc});
16192 }
16193
16194 // If the address is a constant pointer increment itself, find
16195 // another constant increment that has the same base operand
16196 SDValue Base;
16197 SDValue CInc;
16198 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16199 unsigned Offset =
16200 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16201 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16202 UI != UE; ++UI) {
16203
16204 SDNode *User = *UI;
16205 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16206 User->getNumOperands() != 2)
16207 continue;
16208
16209 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16210 unsigned UserOffset =
16211 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16212
16213 if (!UserOffset || UserOffset <= Offset)
16214 continue;
16215
16216 unsigned NewConstInc = UserOffset - Offset;
16217 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16218 BaseUpdates.push_back({User, NewInc, NewConstInc});
16219 }
16220 }
16221
16222 // Try to fold the load/store with an update that matches memory
16223 // access size. This should work well for sequential loads.
16224 //
16225 // Filter out invalid updates as well.
16226 unsigned NumValidUpd = BaseUpdates.size();
16227 for (unsigned I = 0; I < NumValidUpd;) {
16228 BaseUpdateUser &User = BaseUpdates[I];
16229 if (!isValidBaseUpdate(N, User.N)) {
16230 --NumValidUpd;
16231 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16232 continue;
16233 }
16234
16235 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16236 return SDValue();
16237 ++I;
16238 }
16239 BaseUpdates.resize(NumValidUpd);
16240
16241 // Try to fold with other users. Non-constant updates are considered
16242 // first, and constant updates are sorted to not break a sequence of
16243 // strided accesses (if there is any).
16244 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16245 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16246 return LHS.ConstInc < RHS.ConstInc;
16247 });
16248 for (BaseUpdateUser &User : BaseUpdates) {
16249 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16250 return SDValue();
16251 }
16252 return SDValue();
16253}
16254
16257 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16258 return SDValue();
16259
16260 return CombineBaseUpdate(N, DCI);
16261}
16262
16265 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16266 return SDValue();
16267
16268 SelectionDAG &DAG = DCI.DAG;
16269 SDValue Addr = N->getOperand(2);
16270 MemSDNode *MemN = cast<MemSDNode>(N);
16271 SDLoc dl(N);
16272
16273 // For the stores, where there are multiple intrinsics we only actually want
16274 // to post-inc the last of the them.
16275 unsigned IntNo = N->getConstantOperandVal(1);
16276 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16277 return SDValue();
16278 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16279 return SDValue();
16280
16281 // Search for a use of the address operand that is an increment.
16282 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16283 UE = Addr.getNode()->use_end();
16284 UI != UE; ++UI) {
16285 SDNode *User = *UI;
16286 if (User->getOpcode() != ISD::ADD ||
16287 UI.getUse().getResNo() != Addr.getResNo())
16288 continue;
16289
16290 // Check that the add is independent of the load/store. Otherwise, folding
16291 // it would create a cycle. We can avoid searching through Addr as it's a
16292 // predecessor to both.
16295 Visited.insert(Addr.getNode());
16296 Worklist.push_back(N);
16297 Worklist.push_back(User);
16298 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16299 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16300 continue;
16301
16302 // Find the new opcode for the updating load/store.
16303 bool isLoadOp = true;
16304 unsigned NewOpc = 0;
16305 unsigned NumVecs = 0;
16306 switch (IntNo) {
16307 default:
16308 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16309 case Intrinsic::arm_mve_vld2q:
16310 NewOpc = ARMISD::VLD2_UPD;
16311 NumVecs = 2;
16312 break;
16313 case Intrinsic::arm_mve_vld4q:
16314 NewOpc = ARMISD::VLD4_UPD;
16315 NumVecs = 4;
16316 break;
16317 case Intrinsic::arm_mve_vst2q:
16318 NewOpc = ARMISD::VST2_UPD;
16319 NumVecs = 2;
16320 isLoadOp = false;
16321 break;
16322 case Intrinsic::arm_mve_vst4q:
16323 NewOpc = ARMISD::VST4_UPD;
16324 NumVecs = 4;
16325 isLoadOp = false;
16326 break;
16327 }
16328
16329 // Find the size of memory referenced by the load/store.
16330 EVT VecTy;
16331 if (isLoadOp) {
16332 VecTy = N->getValueType(0);
16333 } else {
16334 VecTy = N->getOperand(3).getValueType();
16335 }
16336
16337 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16338
16339 // If the increment is a constant, it must match the memory ref size.
16340 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16341 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16342 if (!CInc || CInc->getZExtValue() != NumBytes)
16343 continue;
16344
16345 // Create the new updating load/store node.
16346 // First, create an SDVTList for the new updating node's results.
16347 EVT Tys[6];
16348 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16349 unsigned n;
16350 for (n = 0; n < NumResultVecs; ++n)
16351 Tys[n] = VecTy;
16352 Tys[n++] = MVT::i32;
16353 Tys[n] = MVT::Other;
16354 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16355
16356 // Then, gather the new node's operands.
16358 Ops.push_back(N->getOperand(0)); // incoming chain
16359 Ops.push_back(N->getOperand(2)); // ptr
16360 Ops.push_back(Inc);
16361
16362 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16363 Ops.push_back(N->getOperand(i));
16364
16365 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16366 MemN->getMemOperand());
16367
16368 // Update the uses.
16369 SmallVector<SDValue, 5> NewResults;
16370 for (unsigned i = 0; i < NumResultVecs; ++i)
16371 NewResults.push_back(SDValue(UpdN.getNode(), i));
16372
16373 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16374 DCI.CombineTo(N, NewResults);
16375 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16376
16377 break;
16378 }
16379
16380 return SDValue();
16381}
16382
16383/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16384/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16385/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16386/// return true.
16388 SelectionDAG &DAG = DCI.DAG;
16389 EVT VT = N->getValueType(0);
16390 // vldN-dup instructions only support 64-bit vectors for N > 1.
16391 if (!VT.is64BitVector())
16392 return false;
16393
16394 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16395 SDNode *VLD = N->getOperand(0).getNode();
16396 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16397 return false;
16398 unsigned NumVecs = 0;
16399 unsigned NewOpc = 0;
16400 unsigned IntNo = VLD->getConstantOperandVal(1);
16401 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16402 NumVecs = 2;
16403 NewOpc = ARMISD::VLD2DUP;
16404 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16405 NumVecs = 3;
16406 NewOpc = ARMISD::VLD3DUP;
16407 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16408 NumVecs = 4;
16409 NewOpc = ARMISD::VLD4DUP;
16410 } else {
16411 return false;
16412 }
16413
16414 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16415 // numbers match the load.
16416 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16417 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16418 UI != UE; ++UI) {
16419 // Ignore uses of the chain result.
16420 if (UI.getUse().getResNo() == NumVecs)
16421 continue;
16422 SDNode *User = *UI;
16423 if (User->getOpcode() != ARMISD::VDUPLANE ||
16424 VLDLaneNo != User->getConstantOperandVal(1))
16425 return false;
16426 }
16427
16428 // Create the vldN-dup node.
16429 EVT Tys[5];
16430 unsigned n;
16431 for (n = 0; n < NumVecs; ++n)
16432 Tys[n] = VT;
16433 Tys[n] = MVT::Other;
16434 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16435 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16436 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16437 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16438 Ops, VLDMemInt->getMemoryVT(),
16439 VLDMemInt->getMemOperand());
16440
16441 // Update the uses.
16442 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16443 UI != UE; ++UI) {
16444 unsigned ResNo = UI.getUse().getResNo();
16445 // Ignore uses of the chain result.
16446 if (ResNo == NumVecs)
16447 continue;
16448 SDNode *User = *UI;
16449 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16450 }
16451
16452 // Now the vldN-lane intrinsic is dead except for its chain result.
16453 // Update uses of the chain.
16454 std::vector<SDValue> VLDDupResults;
16455 for (unsigned n = 0; n < NumVecs; ++n)
16456 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16457 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16458 DCI.CombineTo(VLD, VLDDupResults);
16459
16460 return true;
16461}
16462
16463/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16464/// ARMISD::VDUPLANE.
16467 const ARMSubtarget *Subtarget) {
16468 SDValue Op = N->getOperand(0);
16469 EVT VT = N->getValueType(0);
16470
16471 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16472 if (Subtarget->hasMVEIntegerOps()) {
16473 EVT ExtractVT = VT.getVectorElementType();
16474 // We need to ensure we are creating a legal type.
16475 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16476 ExtractVT = MVT::i32;
16477 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16478 N->getOperand(0), N->getOperand(1));
16479 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16480 }
16481
16482 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16483 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16484 if (CombineVLDDUP(N, DCI))
16485 return SDValue(N, 0);
16486
16487 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16488 // redundant. Ignore bit_converts for now; element sizes are checked below.
16489 while (Op.getOpcode() == ISD::BITCAST)
16490 Op = Op.getOperand(0);
16491 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16492 return SDValue();
16493
16494 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16495 unsigned EltSize = Op.getScalarValueSizeInBits();
16496 // The canonical VMOV for a zero vector uses a 32-bit element size.
16497 unsigned Imm = Op.getConstantOperandVal(0);
16498 unsigned EltBits;
16499 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16500 EltSize = 8;
16501 if (EltSize > VT.getScalarSizeInBits())
16502 return SDValue();
16503
16504 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16505}
16506
16507/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16509 const ARMSubtarget *Subtarget) {
16510 SDValue Op = N->getOperand(0);
16511 SDLoc dl(N);
16512
16513 if (Subtarget->hasMVEIntegerOps()) {
16514 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16515 // need to come from a GPR.
16516 if (Op.getValueType() == MVT::f32)
16517 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16518 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16519 else if (Op.getValueType() == MVT::f16)
16520 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16521 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16522 }
16523
16524 if (!Subtarget->hasNEON())
16525 return SDValue();
16526
16527 // Match VDUP(LOAD) -> VLD1DUP.
16528 // We match this pattern here rather than waiting for isel because the
16529 // transform is only legal for unindexed loads.
16530 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16531 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16532 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16533 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16534 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16535 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16536 SDValue VLDDup =
16537 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16538 LD->getMemoryVT(), LD->getMemOperand());
16539 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16540 return VLDDup;
16541 }
16542
16543 return SDValue();
16544}
16545
16548 const ARMSubtarget *Subtarget) {
16549 EVT VT = N->getValueType(0);
16550
16551 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16552 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16554 return CombineBaseUpdate(N, DCI);
16555
16556 return SDValue();
16557}
16558
16559// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16560// pack all of the elements in one place. Next, store to memory in fewer
16561// chunks.
16563 SelectionDAG &DAG) {
16564 SDValue StVal = St->getValue();
16565 EVT VT = StVal.getValueType();
16566 if (!St->isTruncatingStore() || !VT.isVector())
16567 return SDValue();
16568 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16569 EVT StVT = St->getMemoryVT();
16570 unsigned NumElems = VT.getVectorNumElements();
16571 assert(StVT != VT && "Cannot truncate to the same type");
16572 unsigned FromEltSz = VT.getScalarSizeInBits();
16573 unsigned ToEltSz = StVT.getScalarSizeInBits();
16574
16575 // From, To sizes and ElemCount must be pow of two
16576 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16577 return SDValue();
16578
16579 // We are going to use the original vector elt for storing.
16580 // Accumulated smaller vector elements must be a multiple of the store size.
16581 if (0 != (NumElems * FromEltSz) % ToEltSz)
16582 return SDValue();
16583
16584 unsigned SizeRatio = FromEltSz / ToEltSz;
16585 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16586
16587 // Create a type on which we perform the shuffle.
16588 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16589 NumElems * SizeRatio);
16590 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16591
16592 SDLoc DL(St);
16593 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16594 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16595 for (unsigned i = 0; i < NumElems; ++i)
16596 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16597 : i * SizeRatio;
16598
16599 // Can't shuffle using an illegal type.
16600 if (!TLI.isTypeLegal(WideVecVT))
16601 return SDValue();
16602
16603 SDValue Shuff = DAG.getVectorShuffle(
16604 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16605 // At this point all of the data is stored at the bottom of the
16606 // register. We now need to save it to mem.
16607
16608 // Find the largest store unit
16609 MVT StoreType = MVT::i8;
16610 for (MVT Tp : MVT::integer_valuetypes()) {
16611 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16612 StoreType = Tp;
16613 }
16614 // Didn't find a legal store type.
16615 if (!TLI.isTypeLegal(StoreType))
16616 return SDValue();
16617
16618 // Bitcast the original vector into a vector of store-size units
16619 EVT StoreVecVT =
16620 EVT::getVectorVT(*DAG.getContext(), StoreType,
16621 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16622 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16623 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16625 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16626 TLI.getPointerTy(DAG.getDataLayout()));
16627 SDValue BasePtr = St->getBasePtr();
16628
16629 // Perform one or more big stores into memory.
16630 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16631 for (unsigned I = 0; I < E; I++) {
16632 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16633 ShuffWide, DAG.getIntPtrConstant(I, DL));
16634 SDValue Ch =
16635 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16636 St->getAlign(), St->getMemOperand()->getFlags());
16637 BasePtr =
16638 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16639 Chains.push_back(Ch);
16640 }
16641 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16642}
16643
16644// Try taking a single vector store from an fpround (which would otherwise turn
16645// into an expensive buildvector) and splitting it into a series of narrowing
16646// stores.
16648 SelectionDAG &DAG) {
16649 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16650 return SDValue();
16651 SDValue Trunc = St->getValue();
16652 if (Trunc->getOpcode() != ISD::FP_ROUND)
16653 return SDValue();
16654 EVT FromVT = Trunc->getOperand(0).getValueType();
16655 EVT ToVT = Trunc.getValueType();
16656 if (!ToVT.isVector())
16657 return SDValue();
16659 EVT ToEltVT = ToVT.getVectorElementType();
16660 EVT FromEltVT = FromVT.getVectorElementType();
16661
16662 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16663 return SDValue();
16664
16665 unsigned NumElements = 4;
16666 if (FromVT.getVectorNumElements() % NumElements != 0)
16667 return SDValue();
16668
16669 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16670 // use the VMOVN over splitting the store. We are looking for patterns of:
16671 // !rev: 0 N 1 N+1 2 N+2 ...
16672 // rev: N 0 N+1 1 N+2 2 ...
16673 // The shuffle may either be a single source (in which case N = NumElts/2) or
16674 // two inputs extended with concat to the same size (in which case N =
16675 // NumElts).
16676 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16677 ArrayRef<int> M = SVN->getMask();
16678 unsigned NumElts = ToVT.getVectorNumElements();
16679 if (SVN->getOperand(1).isUndef())
16680 NumElts /= 2;
16681
16682 unsigned Off0 = Rev ? NumElts : 0;
16683 unsigned Off1 = Rev ? 0 : NumElts;
16684
16685 for (unsigned I = 0; I < NumElts; I += 2) {
16686 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16687 return false;
16688 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16689 return false;
16690 }
16691
16692 return true;
16693 };
16694
16695 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16696 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16697 return SDValue();
16698
16699 LLVMContext &C = *DAG.getContext();
16700 SDLoc DL(St);
16701 // Details about the old store
16702 SDValue Ch = St->getChain();
16703 SDValue BasePtr = St->getBasePtr();
16704 Align Alignment = St->getOriginalAlign();
16706 AAMDNodes AAInfo = St->getAAInfo();
16707
16708 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16709 // and then stored as truncating integer stores.
16710 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16711 EVT NewToVT = EVT::getVectorVT(
16712 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16713
16715 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16716 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16717 SDValue NewPtr =
16718 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16719
16720 SDValue Extract =
16721 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16722 DAG.getConstant(i * NumElements, DL, MVT::i32));
16723
16724 SDValue FPTrunc =
16725 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16726 Extract, DAG.getConstant(0, DL, MVT::i32));
16727 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16728
16729 SDValue Store = DAG.getTruncStore(
16730 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16731 NewToVT, Alignment, MMOFlags, AAInfo);
16732 Stores.push_back(Store);
16733 }
16734 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16735}
16736
16737// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16738// into an expensive buildvector) and splitting it into a series of narrowing
16739// stores.
16741 SelectionDAG &DAG) {
16742 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16743 return SDValue();
16744 SDValue Trunc = St->getValue();
16745 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16746 return SDValue();
16747 EVT FromVT = Trunc->getOperand(0).getValueType();
16748 EVT ToVT = Trunc.getValueType();
16749
16750 LLVMContext &C = *DAG.getContext();
16751 SDLoc DL(St);
16752 // Details about the old store
16753 SDValue Ch = St->getChain();
16754 SDValue BasePtr = St->getBasePtr();
16755 Align Alignment = St->getOriginalAlign();
16757 AAMDNodes AAInfo = St->getAAInfo();
16758
16759 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16760 FromVT.getVectorNumElements());
16761
16763 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16764 unsigned NewOffset =
16765 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16766 SDValue NewPtr =
16767 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16768
16769 SDValue Extract = Trunc.getOperand(i);
16770 SDValue Store = DAG.getTruncStore(
16771 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16772 NewToVT, Alignment, MMOFlags, AAInfo);
16773 Stores.push_back(Store);
16774 }
16775 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16776}
16777
16778// Given a floating point store from an extracted vector, with an integer
16779// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16780// help reduce fp register pressure, doesn't require the fp extract and allows
16781// use of more integer post-inc stores not available with vstr.
16783 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16784 return SDValue();
16785 SDValue Extract = St->getValue();
16786 EVT VT = Extract.getValueType();
16787 // For now only uses f16. This may be useful for f32 too, but that will
16788 // be bitcast(extract), not the VGETLANEu we currently check here.
16789 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16790 return SDValue();
16791
16792 SDNode *GetLane =
16793 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16794 {Extract.getOperand(0), Extract.getOperand(1)});
16795 if (!GetLane)
16796 return SDValue();
16797
16798 LLVMContext &C = *DAG.getContext();
16799 SDLoc DL(St);
16800 // Create a new integer store to replace the existing floating point version.
16801 SDValue Ch = St->getChain();
16802 SDValue BasePtr = St->getBasePtr();
16803 Align Alignment = St->getOriginalAlign();
16805 AAMDNodes AAInfo = St->getAAInfo();
16806 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16807 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16808 St->getPointerInfo(), NewToVT, Alignment,
16809 MMOFlags, AAInfo);
16810
16811 return Store;
16812}
16813
16814/// PerformSTORECombine - Target-specific dag combine xforms for
16815/// ISD::STORE.
16818 const ARMSubtarget *Subtarget) {
16819 StoreSDNode *St = cast<StoreSDNode>(N);
16820 if (St->isVolatile())
16821 return SDValue();
16822 SDValue StVal = St->getValue();
16823 EVT VT = StVal.getValueType();
16824
16825 if (Subtarget->hasNEON())
16826 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16827 return Store;
16828
16829 if (Subtarget->hasMVEFloatOps())
16830 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16831 return NewToken;
16832
16833 if (Subtarget->hasMVEIntegerOps()) {
16834 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16835 return NewChain;
16836 if (SDValue NewToken =
16838 return NewToken;
16839 }
16840
16841 if (!ISD::isNormalStore(St))
16842 return SDValue();
16843
16844 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16845 // ARM stores of arguments in the same cache line.
16846 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16847 StVal.getNode()->hasOneUse()) {
16848 SelectionDAG &DAG = DCI.DAG;
16849 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16850 SDLoc DL(St);
16851 SDValue BasePtr = St->getBasePtr();
16852 SDValue NewST1 = DAG.getStore(
16853 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16854 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16855 St->getMemOperand()->getFlags());
16856
16857 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16858 DAG.getConstant(4, DL, MVT::i32));
16859 return DAG.getStore(NewST1.getValue(0), DL,
16860 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16861 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16862 St->getOriginalAlign(),
16863 St->getMemOperand()->getFlags());
16864 }
16865
16866 if (StVal.getValueType() == MVT::i64 &&
16868
16869 // Bitcast an i64 store extracted from a vector to f64.
16870 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16871 SelectionDAG &DAG = DCI.DAG;
16872 SDLoc dl(StVal);
16873 SDValue IntVec = StVal.getOperand(0);
16874 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16876 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16877 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16878 Vec, StVal.getOperand(1));
16879 dl = SDLoc(N);
16880 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16881 // Make the DAGCombiner fold the bitcasts.
16882 DCI.AddToWorklist(Vec.getNode());
16883 DCI.AddToWorklist(ExtElt.getNode());
16884 DCI.AddToWorklist(V.getNode());
16885 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16886 St->getPointerInfo(), St->getAlign(),
16887 St->getMemOperand()->getFlags(), St->getAAInfo());
16888 }
16889
16890 // If this is a legal vector store, try to combine it into a VST1_UPD.
16891 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16893 return CombineBaseUpdate(N, DCI);
16894
16895 return SDValue();
16896}
16897
16898/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16899/// can replace combinations of VMUL and VCVT (floating-point to integer)
16900/// when the VMUL has a constant operand that is a power of 2.
16901///
16902/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16903/// vmul.f32 d16, d17, d16
16904/// vcvt.s32.f32 d16, d16
16905/// becomes:
16906/// vcvt.s32.f32 d16, d16, #3
16908 const ARMSubtarget *Subtarget) {
16909 if (!Subtarget->hasNEON())
16910 return SDValue();
16911
16912 SDValue Op = N->getOperand(0);
16913 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16914 Op.getOpcode() != ISD::FMUL)
16915 return SDValue();
16916
16917 SDValue ConstVec = Op->getOperand(1);
16918 if (!isa<BuildVectorSDNode>(ConstVec))
16919 return SDValue();
16920
16921 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16922 uint32_t FloatBits = FloatTy.getSizeInBits();
16923 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16924 uint32_t IntBits = IntTy.getSizeInBits();
16925 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16926 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16927 // These instructions only exist converting from f32 to i32. We can handle
16928 // smaller integers by generating an extra truncate, but larger ones would
16929 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16930 // these intructions only support v2i32/v4i32 types.
16931 return SDValue();
16932 }
16933
16934 BitVector UndefElements;
16935 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16936 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16937 if (C == -1 || C == 0 || C > 32)
16938 return SDValue();
16939
16940 SDLoc dl(N);
16941 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16942 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16943 Intrinsic::arm_neon_vcvtfp2fxu;
16944 SDValue FixConv = DAG.getNode(
16945 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16946 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16947 DAG.getConstant(C, dl, MVT::i32));
16948
16949 if (IntBits < FloatBits)
16950 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16951
16952 return FixConv;
16953}
16954
16956 const ARMSubtarget *Subtarget) {
16957 if (!Subtarget->hasMVEFloatOps())
16958 return SDValue();
16959
16960 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16961 // The second form can be more easily turned into a predicated vadd, and
16962 // possibly combined into a fma to become a predicated vfma.
16963 SDValue Op0 = N->getOperand(0);
16964 SDValue Op1 = N->getOperand(1);
16965 EVT VT = N->getValueType(0);
16966 SDLoc DL(N);
16967
16968 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16969 // which these VMOV's represent.
16970 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16971 if (Op.getOpcode() != ISD::BITCAST ||
16972 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16973 return false;
16974 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16975 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16976 return true;
16977 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16978 return true;
16979 return false;
16980 };
16981
16982 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16983 std::swap(Op0, Op1);
16984
16985 if (Op1.getOpcode() != ISD::VSELECT)
16986 return SDValue();
16987
16988 SDNodeFlags FaddFlags = N->getFlags();
16989 bool NSZ = FaddFlags.hasNoSignedZeros();
16990 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16991 return SDValue();
16992
16993 SDValue FAdd =
16994 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16995 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16996}
16997
16999 SDValue LHS = N->getOperand(0);
17000 SDValue RHS = N->getOperand(1);
17001 EVT VT = N->getValueType(0);
17002 SDLoc DL(N);
17003
17004 if (!N->getFlags().hasAllowReassociation())
17005 return SDValue();
17006
17007 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17008 auto ReassocComplex = [&](SDValue A, SDValue B) {
17009 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17010 return SDValue();
17011 unsigned Opc = A.getConstantOperandVal(0);
17012 if (Opc != Intrinsic::arm_mve_vcmlaq)
17013 return SDValue();
17014 SDValue VCMLA = DAG.getNode(
17015 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17016 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17017 A.getOperand(3), A.getOperand(4));
17018 VCMLA->setFlags(A->getFlags());
17019 return VCMLA;
17020 };
17021 if (SDValue R = ReassocComplex(LHS, RHS))
17022 return R;
17023 if (SDValue R = ReassocComplex(RHS, LHS))
17024 return R;
17025
17026 return SDValue();
17027}
17028
17030 const ARMSubtarget *Subtarget) {
17031 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17032 return S;
17033 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17034 return S;
17035 return SDValue();
17036}
17037
17038/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17039/// can replace combinations of VCVT (integer to floating-point) and VMUL
17040/// when the VMUL has a constant operand that is a power of 2.
17041///
17042/// Example (assume d17 = <float 0.125, float 0.125>):
17043/// vcvt.f32.s32 d16, d16
17044/// vmul.f32 d16, d16, d17
17045/// becomes:
17046/// vcvt.f32.s32 d16, d16, #3
17048 const ARMSubtarget *Subtarget) {
17049 if (!Subtarget->hasNEON())
17050 return SDValue();
17051
17052 SDValue Op = N->getOperand(0);
17053 unsigned OpOpcode = Op.getNode()->getOpcode();
17054 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17055 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17056 return SDValue();
17057
17058 SDValue ConstVec = N->getOperand(1);
17059 if (!isa<BuildVectorSDNode>(ConstVec))
17060 return SDValue();
17061
17062 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17063 uint32_t FloatBits = FloatTy.getSizeInBits();
17064 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17065 uint32_t IntBits = IntTy.getSizeInBits();
17066 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17067 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17068 // These instructions only exist converting from i32 to f32. We can handle
17069 // smaller integers by generating an extra extend, but larger ones would
17070 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17071 // these intructions only support v2i32/v4i32 types.
17072 return SDValue();
17073 }
17074
17075 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17076 APFloat Recip(0.0f);
17077 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17078 return SDValue();
17079
17080 bool IsExact;
17081 APSInt IntVal(33);
17082 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17083 APFloat::opOK ||
17084 !IsExact)
17085 return SDValue();
17086
17087 int32_t C = IntVal.exactLogBase2();
17088 if (C == -1 || C == 0 || C > 32)
17089 return SDValue();
17090
17091 SDLoc DL(N);
17092 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17093 SDValue ConvInput = Op.getOperand(0);
17094 if (IntBits < FloatBits)
17096 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17097
17098 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17099 : Intrinsic::arm_neon_vcvtfxu2fp;
17100 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17101 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17102 DAG.getConstant(C, DL, MVT::i32));
17103}
17104
17106 const ARMSubtarget *ST) {
17107 if (!ST->hasMVEIntegerOps())
17108 return SDValue();
17109
17110 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17111 EVT ResVT = N->getValueType(0);
17112 SDValue N0 = N->getOperand(0);
17113 SDLoc dl(N);
17114
17115 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17116 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17117 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17118 N0.getValueType() == MVT::v16i8)) {
17119 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17120 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17121 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17122 }
17123
17124 // We are looking for something that will have illegal types if left alone,
17125 // but that we can convert to a single instruction under MVE. For example
17126 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17127 // or
17128 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17129
17130 // The legal cases are:
17131 // VADDV u/s 8/16/32
17132 // VMLAV u/s 8/16/32
17133 // VADDLV u/s 32
17134 // VMLALV u/s 16/32
17135
17136 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17137 // extend it and use v4i32 instead.
17138 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17139 EVT AVT = A.getValueType();
17140 return any_of(ExtTypes, [&](MVT Ty) {
17141 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17142 AVT.bitsLE(Ty);
17143 });
17144 };
17145 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17146 EVT AVT = A.getValueType();
17147 if (!AVT.is128BitVector())
17148 A = DAG.getNode(ExtendCode, dl,
17150 128 / AVT.getVectorMinNumElements())),
17151 A);
17152 return A;
17153 };
17154 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17155 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17156 return SDValue();
17157 SDValue A = N0->getOperand(0);
17158 if (ExtTypeMatches(A, ExtTypes))
17159 return ExtendIfNeeded(A, ExtendCode);
17160 return SDValue();
17161 };
17162 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17163 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17164 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17166 return SDValue();
17167 Mask = N0->getOperand(0);
17168 SDValue Ext = N0->getOperand(1);
17169 if (Ext->getOpcode() != ExtendCode)
17170 return SDValue();
17171 SDValue A = Ext->getOperand(0);
17172 if (ExtTypeMatches(A, ExtTypes))
17173 return ExtendIfNeeded(A, ExtendCode);
17174 return SDValue();
17175 };
17176 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17177 SDValue &A, SDValue &B) {
17178 // For a vmla we are trying to match a larger pattern:
17179 // ExtA = sext/zext A
17180 // ExtB = sext/zext B
17181 // Mul = mul ExtA, ExtB
17182 // vecreduce.add Mul
17183 // There might also be en extra extend between the mul and the addreduce, so
17184 // long as the bitwidth is high enough to make them equivalent (for example
17185 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17186 if (ResVT != RetTy)
17187 return false;
17188 SDValue Mul = N0;
17189 if (Mul->getOpcode() == ExtendCode &&
17190 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17191 ResVT.getScalarSizeInBits())
17192 Mul = Mul->getOperand(0);
17193 if (Mul->getOpcode() != ISD::MUL)
17194 return false;
17195 SDValue ExtA = Mul->getOperand(0);
17196 SDValue ExtB = Mul->getOperand(1);
17197 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17198 return false;
17199 A = ExtA->getOperand(0);
17200 B = ExtB->getOperand(0);
17201 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17202 A = ExtendIfNeeded(A, ExtendCode);
17203 B = ExtendIfNeeded(B, ExtendCode);
17204 return true;
17205 }
17206 return false;
17207 };
17208 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17209 SDValue &A, SDValue &B, SDValue &Mask) {
17210 // Same as the pattern above with a select for the zero predicated lanes
17211 // ExtA = sext/zext A
17212 // ExtB = sext/zext B
17213 // Mul = mul ExtA, ExtB
17214 // N0 = select Mask, Mul, 0
17215 // vecreduce.add N0
17216 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17218 return false;
17219 Mask = N0->getOperand(0);
17220 SDValue Mul = N0->getOperand(1);
17221 if (Mul->getOpcode() == ExtendCode &&
17222 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17223 ResVT.getScalarSizeInBits())
17224 Mul = Mul->getOperand(0);
17225 if (Mul->getOpcode() != ISD::MUL)
17226 return false;
17227 SDValue ExtA = Mul->getOperand(0);
17228 SDValue ExtB = Mul->getOperand(1);
17229 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17230 return false;
17231 A = ExtA->getOperand(0);
17232 B = ExtB->getOperand(0);
17233 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17234 A = ExtendIfNeeded(A, ExtendCode);
17235 B = ExtendIfNeeded(B, ExtendCode);
17236 return true;
17237 }
17238 return false;
17239 };
17240 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17241 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17242 // reductions. The operands are extended with MVEEXT, but as they are
17243 // reductions the lane orders do not matter. MVEEXT may be combined with
17244 // loads to produce two extending loads, or else they will be expanded to
17245 // VREV/VMOVL.
17246 EVT VT = Ops[0].getValueType();
17247 if (VT == MVT::v16i8) {
17248 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17249 "Unexpected illegal long reduction opcode");
17250 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17251
17252 SDValue Ext0 =
17253 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17254 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17255 SDValue Ext1 =
17256 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17257 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17258
17259 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17260 Ext0, Ext1);
17261 SDValue MLA1 =
17262 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17263 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17264 Ext0.getValue(1), Ext1.getValue(1));
17265 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17266 }
17267 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17268 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17269 SDValue(Node.getNode(), 1));
17270 };
17271
17272 SDValue A, B;
17273 SDValue Mask;
17274 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17275 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17276 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17277 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17278 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17279 A, B))
17280 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17281 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17282 A, B))
17283 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17284 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17285 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17286 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17287 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17288 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17289 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17290
17291 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17292 Mask))
17293 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17294 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17295 Mask))
17296 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17297 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17298 Mask))
17299 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17300 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17301 Mask))
17302 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17303 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17304 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17305 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17306 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17307 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17308 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17309
17310 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17311 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17312 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17313 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17314 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17315 return Create64bitNode(ARMISD::VADDLVs, {A});
17316 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17317 return Create64bitNode(ARMISD::VADDLVu, {A});
17318 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17319 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17320 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17321 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17322 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17323 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17324
17325 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17326 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17327 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17328 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17329 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17330 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17331 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17332 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17333 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17334 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17335 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17336 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17337 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17338 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17339
17340 // Some complications. We can get a case where the two inputs of the mul are
17341 // the same, then the output sext will have been helpfully converted to a
17342 // zext. Turn it back.
17343 SDValue Op = N0;
17344 if (Op->getOpcode() == ISD::VSELECT)
17345 Op = Op->getOperand(1);
17346 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17347 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17348 SDValue Mul = Op->getOperand(0);
17349 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17350 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17351 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17352 if (Op != N0)
17353 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17354 N0->getOperand(0), Ext, N0->getOperand(2));
17355 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17356 }
17357 }
17358
17359 return SDValue();
17360}
17361
17362// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17363// the lanes are used. Due to the reduction being commutative the shuffle can be
17364// removed.
17366 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17367 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17368 if (!Shuf || !Shuf->getOperand(1).isUndef())
17369 return SDValue();
17370
17371 // Check all elements are used once in the mask.
17372 ArrayRef<int> Mask = Shuf->getMask();
17373 APInt SetElts(Mask.size(), 0);
17374 for (int E : Mask) {
17375 if (E < 0 || E >= (int)Mask.size())
17376 return SDValue();
17377 SetElts.setBit(E);
17378 }
17379 if (!SetElts.isAllOnes())
17380 return SDValue();
17381
17382 if (N->getNumOperands() != VecOp + 1) {
17383 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17384 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17385 return SDValue();
17386 }
17387
17389 for (SDValue Op : N->ops()) {
17390 if (Op.getValueType().isVector())
17391 Ops.push_back(Op.getOperand(0));
17392 else
17393 Ops.push_back(Op);
17394 }
17395 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17396}
17397
17400 SDValue Op0 = N->getOperand(0);
17401 SDValue Op1 = N->getOperand(1);
17402 unsigned IsTop = N->getConstantOperandVal(2);
17403
17404 // VMOVNT a undef -> a
17405 // VMOVNB a undef -> a
17406 // VMOVNB undef a -> a
17407 if (Op1->isUndef())
17408 return Op0;
17409 if (Op0->isUndef() && !IsTop)
17410 return Op1;
17411
17412 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17413 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17414 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17415 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17416 Op1->getConstantOperandVal(2) == 0)
17417 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17418 Op0, Op1->getOperand(1), N->getOperand(2));
17419
17420 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17421 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17422 // into the top or bottom lanes.
17423 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17424 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17425 APInt Op0DemandedElts =
17426 IsTop ? Op1DemandedElts
17427 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17428
17429 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17430 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17431 return SDValue(N, 0);
17432 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17433 return SDValue(N, 0);
17434
17435 return SDValue();
17436}
17437
17440 SDValue Op0 = N->getOperand(0);
17441 unsigned IsTop = N->getConstantOperandVal(2);
17442
17443 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17444 APInt Op0DemandedElts =
17445 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17446 : APInt::getHighBitsSet(2, 1));
17447
17448 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17449 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17450 return SDValue(N, 0);
17451 return SDValue();
17452}
17453
17456 EVT VT = N->getValueType(0);
17457 SDValue LHS = N->getOperand(0);
17458 SDValue RHS = N->getOperand(1);
17459
17460 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17461 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17462 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17463 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17464 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17465 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17466 SDLoc DL(N);
17467 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17468 LHS.getOperand(0), RHS.getOperand(0));
17469 SDValue UndefV = LHS.getOperand(1);
17470 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17471 }
17472 return SDValue();
17473}
17474
17476 SDLoc DL(N);
17477 SDValue Op0 = N->getOperand(0);
17478 SDValue Op1 = N->getOperand(1);
17479
17480 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17481 // uses of the intrinsics.
17482 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17483 int ShiftAmt = C->getSExtValue();
17484 if (ShiftAmt == 0) {
17485 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17486 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17487 return SDValue();
17488 }
17489
17490 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17491 unsigned NewOpcode =
17492 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17493 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17494 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17495 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17496 return NewShift;
17497 }
17498 }
17499
17500 return SDValue();
17501}
17502
17503/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17505 DAGCombinerInfo &DCI) const {
17506 SelectionDAG &DAG = DCI.DAG;
17507 unsigned IntNo = N->getConstantOperandVal(0);
17508 switch (IntNo) {
17509 default:
17510 // Don't do anything for most intrinsics.
17511 break;
17512
17513 // Vector shifts: check for immediate versions and lower them.
17514 // Note: This is done during DAG combining instead of DAG legalizing because
17515 // the build_vectors for 64-bit vector element shift counts are generally
17516 // not legal, and it is hard to see their values after they get legalized to
17517 // loads from a constant pool.
17518 case Intrinsic::arm_neon_vshifts:
17519 case Intrinsic::arm_neon_vshiftu:
17520 case Intrinsic::arm_neon_vrshifts:
17521 case Intrinsic::arm_neon_vrshiftu:
17522 case Intrinsic::arm_neon_vrshiftn:
17523 case Intrinsic::arm_neon_vqshifts:
17524 case Intrinsic::arm_neon_vqshiftu:
17525 case Intrinsic::arm_neon_vqshiftsu:
17526 case Intrinsic::arm_neon_vqshiftns:
17527 case Intrinsic::arm_neon_vqshiftnu:
17528 case Intrinsic::arm_neon_vqshiftnsu:
17529 case Intrinsic::arm_neon_vqrshiftns:
17530 case Intrinsic::arm_neon_vqrshiftnu:
17531 case Intrinsic::arm_neon_vqrshiftnsu: {
17532 EVT VT = N->getOperand(1).getValueType();
17533 int64_t Cnt;
17534 unsigned VShiftOpc = 0;
17535
17536 switch (IntNo) {
17537 case Intrinsic::arm_neon_vshifts:
17538 case Intrinsic::arm_neon_vshiftu:
17539 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17540 VShiftOpc = ARMISD::VSHLIMM;
17541 break;
17542 }
17543 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17544 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17546 break;
17547 }
17548 return SDValue();
17549
17550 case Intrinsic::arm_neon_vrshifts:
17551 case Intrinsic::arm_neon_vrshiftu:
17552 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17553 break;
17554 return SDValue();
17555
17556 case Intrinsic::arm_neon_vqshifts:
17557 case Intrinsic::arm_neon_vqshiftu:
17558 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17559 break;
17560 return SDValue();
17561
17562 case Intrinsic::arm_neon_vqshiftsu:
17563 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17564 break;
17565 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17566
17567 case Intrinsic::arm_neon_vrshiftn:
17568 case Intrinsic::arm_neon_vqshiftns:
17569 case Intrinsic::arm_neon_vqshiftnu:
17570 case Intrinsic::arm_neon_vqshiftnsu:
17571 case Intrinsic::arm_neon_vqrshiftns:
17572 case Intrinsic::arm_neon_vqrshiftnu:
17573 case Intrinsic::arm_neon_vqrshiftnsu:
17574 // Narrowing shifts require an immediate right shift.
17575 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17576 break;
17577 llvm_unreachable("invalid shift count for narrowing vector shift "
17578 "intrinsic");
17579
17580 default:
17581 llvm_unreachable("unhandled vector shift");
17582 }
17583
17584 switch (IntNo) {
17585 case Intrinsic::arm_neon_vshifts:
17586 case Intrinsic::arm_neon_vshiftu:
17587 // Opcode already set above.
17588 break;
17589 case Intrinsic::arm_neon_vrshifts:
17590 VShiftOpc = ARMISD::VRSHRsIMM;
17591 break;
17592 case Intrinsic::arm_neon_vrshiftu:
17593 VShiftOpc = ARMISD::VRSHRuIMM;
17594 break;
17595 case Intrinsic::arm_neon_vrshiftn:
17596 VShiftOpc = ARMISD::VRSHRNIMM;
17597 break;
17598 case Intrinsic::arm_neon_vqshifts:
17599 VShiftOpc = ARMISD::VQSHLsIMM;
17600 break;
17601 case Intrinsic::arm_neon_vqshiftu:
17602 VShiftOpc = ARMISD::VQSHLuIMM;
17603 break;
17604 case Intrinsic::arm_neon_vqshiftsu:
17605 VShiftOpc = ARMISD::VQSHLsuIMM;
17606 break;
17607 case Intrinsic::arm_neon_vqshiftns:
17608 VShiftOpc = ARMISD::VQSHRNsIMM;
17609 break;
17610 case Intrinsic::arm_neon_vqshiftnu:
17611 VShiftOpc = ARMISD::VQSHRNuIMM;
17612 break;
17613 case Intrinsic::arm_neon_vqshiftnsu:
17614 VShiftOpc = ARMISD::VQSHRNsuIMM;
17615 break;
17616 case Intrinsic::arm_neon_vqrshiftns:
17617 VShiftOpc = ARMISD::VQRSHRNsIMM;
17618 break;
17619 case Intrinsic::arm_neon_vqrshiftnu:
17620 VShiftOpc = ARMISD::VQRSHRNuIMM;
17621 break;
17622 case Intrinsic::arm_neon_vqrshiftnsu:
17623 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17624 break;
17625 }
17626
17627 SDLoc dl(N);
17628 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17629 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17630 }
17631
17632 case Intrinsic::arm_neon_vshiftins: {
17633 EVT VT = N->getOperand(1).getValueType();
17634 int64_t Cnt;
17635 unsigned VShiftOpc = 0;
17636
17637 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17638 VShiftOpc = ARMISD::VSLIIMM;
17639 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17640 VShiftOpc = ARMISD::VSRIIMM;
17641 else {
17642 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17643 }
17644
17645 SDLoc dl(N);
17646 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17647 N->getOperand(1), N->getOperand(2),
17648 DAG.getConstant(Cnt, dl, MVT::i32));
17649 }
17650
17651 case Intrinsic::arm_neon_vqrshifts:
17652 case Intrinsic::arm_neon_vqrshiftu:
17653 // No immediate versions of these to check for.
17654 break;
17655
17656 case Intrinsic::arm_mve_vqdmlah:
17657 case Intrinsic::arm_mve_vqdmlash:
17658 case Intrinsic::arm_mve_vqrdmlah:
17659 case Intrinsic::arm_mve_vqrdmlash:
17660 case Intrinsic::arm_mve_vmla_n_predicated:
17661 case Intrinsic::arm_mve_vmlas_n_predicated:
17662 case Intrinsic::arm_mve_vqdmlah_predicated:
17663 case Intrinsic::arm_mve_vqdmlash_predicated:
17664 case Intrinsic::arm_mve_vqrdmlah_predicated:
17665 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17666 // These intrinsics all take an i32 scalar operand which is narrowed to the
17667 // size of a single lane of the vector type they return. So we don't need
17668 // any bits of that operand above that point, which allows us to eliminate
17669 // uxth/sxth.
17670 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17671 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17672 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17673 return SDValue();
17674 break;
17675 }
17676
17677 case Intrinsic::arm_mve_minv:
17678 case Intrinsic::arm_mve_maxv:
17679 case Intrinsic::arm_mve_minav:
17680 case Intrinsic::arm_mve_maxav:
17681 case Intrinsic::arm_mve_minv_predicated:
17682 case Intrinsic::arm_mve_maxv_predicated:
17683 case Intrinsic::arm_mve_minav_predicated:
17684 case Intrinsic::arm_mve_maxav_predicated: {
17685 // These intrinsics all take an i32 scalar operand which is narrowed to the
17686 // size of a single lane of the vector type they take as the other input.
17687 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17688 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17689 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17690 return SDValue();
17691 break;
17692 }
17693
17694 case Intrinsic::arm_mve_addv: {
17695 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17696 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17697 bool Unsigned = N->getConstantOperandVal(2);
17698 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17699 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17700 }
17701
17702 case Intrinsic::arm_mve_addlv:
17703 case Intrinsic::arm_mve_addlv_predicated: {
17704 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17705 // which recombines the two outputs into an i64
17706 bool Unsigned = N->getConstantOperandVal(2);
17707 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17710
17712 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17713 if (i != 2) // skip the unsigned flag
17714 Ops.push_back(N->getOperand(i));
17715
17716 SDLoc dl(N);
17717 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17718 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17719 val.getValue(1));
17720 }
17721 }
17722
17723 return SDValue();
17724}
17725
17726/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17727/// lowers them. As with the vector shift intrinsics, this is done during DAG
17728/// combining instead of DAG legalizing because the build_vectors for 64-bit
17729/// vector element shift counts are generally not legal, and it is hard to see
17730/// their values after they get legalized to loads from a constant pool.
17733 const ARMSubtarget *ST) {
17734 SelectionDAG &DAG = DCI.DAG;
17735 EVT VT = N->getValueType(0);
17736
17737 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17738 N->getOperand(0)->getOpcode() == ISD::AND &&
17739 N->getOperand(0)->hasOneUse()) {
17740 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17741 return SDValue();
17742 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17743 // usually show up because instcombine prefers to canonicalize it to
17744 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17745 // out of GEP lowering in some cases.
17746 SDValue N0 = N->getOperand(0);
17747 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17748 if (!ShiftAmtNode)
17749 return SDValue();
17750 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17751 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17752 if (!AndMaskNode)
17753 return SDValue();
17754 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17755 // Don't transform uxtb/uxth.
17756 if (AndMask == 255 || AndMask == 65535)
17757 return SDValue();
17758 if (isMask_32(AndMask)) {
17759 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17760 if (MaskedBits > ShiftAmt) {
17761 SDLoc DL(N);
17762 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17763 DAG.getConstant(MaskedBits, DL, MVT::i32));
17764 return DAG.getNode(
17765 ISD::SRL, DL, MVT::i32, SHL,
17766 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17767 }
17768 }
17769 }
17770
17771 // Nothing to be done for scalar shifts.
17772 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17773 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17774 return SDValue();
17775 if (ST->hasMVEIntegerOps())
17776 return SDValue();
17777
17778 int64_t Cnt;
17779
17780 switch (N->getOpcode()) {
17781 default: llvm_unreachable("unexpected shift opcode");
17782
17783 case ISD::SHL:
17784 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17785 SDLoc dl(N);
17786 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17787 DAG.getConstant(Cnt, dl, MVT::i32));
17788 }
17789 break;
17790
17791 case ISD::SRA:
17792 case ISD::SRL:
17793 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17794 unsigned VShiftOpc =
17795 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17796 SDLoc dl(N);
17797 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17798 DAG.getConstant(Cnt, dl, MVT::i32));
17799 }
17800 }
17801 return SDValue();
17802}
17803
17804// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17805// split into multiple extending loads, which are simpler to deal with than an
17806// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17807// to convert the type to an f32.
17809 SDValue N0 = N->getOperand(0);
17810 if (N0.getOpcode() != ISD::LOAD)
17811 return SDValue();
17812 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17813 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17814 LD->getExtensionType() != ISD::NON_EXTLOAD)
17815 return SDValue();
17816 EVT FromVT = LD->getValueType(0);
17817 EVT ToVT = N->getValueType(0);
17818 if (!ToVT.isVector())
17819 return SDValue();
17821 EVT ToEltVT = ToVT.getVectorElementType();
17822 EVT FromEltVT = FromVT.getVectorElementType();
17823
17824 unsigned NumElements = 0;
17825 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17826 NumElements = 4;
17827 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17828 NumElements = 4;
17829 if (NumElements == 0 ||
17830 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17831 FromVT.getVectorNumElements() % NumElements != 0 ||
17832 !isPowerOf2_32(NumElements))
17833 return SDValue();
17834
17835 LLVMContext &C = *DAG.getContext();
17836 SDLoc DL(LD);
17837 // Details about the old load
17838 SDValue Ch = LD->getChain();
17839 SDValue BasePtr = LD->getBasePtr();
17840 Align Alignment = LD->getOriginalAlign();
17841 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17842 AAMDNodes AAInfo = LD->getAAInfo();
17843
17844 ISD::LoadExtType NewExtType =
17845 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17846 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17847 EVT NewFromVT = EVT::getVectorVT(
17848 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17849 EVT NewToVT = EVT::getVectorVT(
17850 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17851
17854 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17855 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17856 SDValue NewPtr =
17857 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17858
17859 SDValue NewLoad =
17860 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17861 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17862 Alignment, MMOFlags, AAInfo);
17863 Loads.push_back(NewLoad);
17864 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17865 }
17866
17867 // Float truncs need to extended with VCVTB's into their floating point types.
17868 if (FromEltVT == MVT::f16) {
17870
17871 for (unsigned i = 0; i < Loads.size(); i++) {
17872 SDValue LoadBC =
17873 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17874 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17875 DAG.getConstant(0, DL, MVT::i32));
17876 Extends.push_back(FPExt);
17877 }
17878
17879 Loads = Extends;
17880 }
17881
17882 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17883 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17884 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17885}
17886
17887/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17888/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17890 const ARMSubtarget *ST) {
17891 SDValue N0 = N->getOperand(0);
17892
17893 // Check for sign- and zero-extensions of vector extract operations of 8- and
17894 // 16-bit vector elements. NEON and MVE support these directly. They are
17895 // handled during DAG combining because type legalization will promote them
17896 // to 32-bit types and it is messy to recognize the operations after that.
17897 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17899 SDValue Vec = N0.getOperand(0);
17900 SDValue Lane = N0.getOperand(1);
17901 EVT VT = N->getValueType(0);
17902 EVT EltVT = N0.getValueType();
17903 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17904
17905 if (VT == MVT::i32 &&
17906 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17907 TLI.isTypeLegal(Vec.getValueType()) &&
17908 isa<ConstantSDNode>(Lane)) {
17909
17910 unsigned Opc = 0;
17911 switch (N->getOpcode()) {
17912 default: llvm_unreachable("unexpected opcode");
17913 case ISD::SIGN_EXTEND:
17914 Opc = ARMISD::VGETLANEs;
17915 break;
17916 case ISD::ZERO_EXTEND:
17917 case ISD::ANY_EXTEND:
17918 Opc = ARMISD::VGETLANEu;
17919 break;
17920 }
17921 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17922 }
17923 }
17924
17925 if (ST->hasMVEIntegerOps())
17926 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17927 return NewLoad;
17928
17929 return SDValue();
17930}
17931
17933 const ARMSubtarget *ST) {
17934 if (ST->hasMVEFloatOps())
17935 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17936 return NewLoad;
17937
17938 return SDValue();
17939}
17940
17941// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17942// constant bounds.
17944 const ARMSubtarget *Subtarget) {
17945 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17946 !Subtarget->isThumb2())
17947 return SDValue();
17948
17949 EVT VT = Op.getValueType();
17950 SDValue Op0 = Op.getOperand(0);
17951
17952 if (VT != MVT::i32 ||
17953 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17954 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17955 !isa<ConstantSDNode>(Op0.getOperand(1)))
17956 return SDValue();
17957
17958 SDValue Min = Op;
17959 SDValue Max = Op0;
17960 SDValue Input = Op0.getOperand(0);
17961 if (Min.getOpcode() == ISD::SMAX)
17962 std::swap(Min, Max);
17963
17964 APInt MinC = Min.getConstantOperandAPInt(1);
17965 APInt MaxC = Max.getConstantOperandAPInt(1);
17966
17967 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17968 !(MinC + 1).isPowerOf2())
17969 return SDValue();
17970
17971 SDLoc DL(Op);
17972 if (MinC == ~MaxC)
17973 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17974 DAG.getConstant(MinC.countr_one(), DL, VT));
17975 if (MaxC == 0)
17976 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17977 DAG.getConstant(MinC.countr_one(), DL, VT));
17978
17979 return SDValue();
17980}
17981
17982/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17983/// saturates.
17985 const ARMSubtarget *ST) {
17986 EVT VT = N->getValueType(0);
17987 SDValue N0 = N->getOperand(0);
17988
17989 if (VT == MVT::i32)
17990 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17991
17992 if (!ST->hasMVEIntegerOps())
17993 return SDValue();
17994
17995 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17996 return V;
17997
17998 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17999 return SDValue();
18000
18001 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
18002 // Check one is a smin and the other is a smax
18003 if (Min->getOpcode() != ISD::SMIN)
18004 std::swap(Min, Max);
18005 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18006 return false;
18007
18008 APInt SaturateC;
18009 if (VT == MVT::v4i32)
18010 SaturateC = APInt(32, (1 << 15) - 1, true);
18011 else //if (VT == MVT::v8i16)
18012 SaturateC = APInt(16, (1 << 7) - 1, true);
18013
18014 APInt MinC, MaxC;
18015 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18016 MinC != SaturateC)
18017 return false;
18018 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18019 MaxC != ~SaturateC)
18020 return false;
18021 return true;
18022 };
18023
18024 if (IsSignedSaturate(N, N0.getNode())) {
18025 SDLoc DL(N);
18026 MVT ExtVT, HalfVT;
18027 if (VT == MVT::v4i32) {
18028 HalfVT = MVT::v8i16;
18029 ExtVT = MVT::v4i16;
18030 } else { // if (VT == MVT::v8i16)
18031 HalfVT = MVT::v16i8;
18032 ExtVT = MVT::v8i8;
18033 }
18034
18035 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18036 // half. That extend will hopefully be removed if only the bottom bits are
18037 // demanded (though a truncating store, for example).
18038 SDValue VQMOVN =
18039 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18040 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18041 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18042 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18043 DAG.getValueType(ExtVT));
18044 }
18045
18046 auto IsUnsignedSaturate = [&](SDNode *Min) {
18047 // For unsigned, we just need to check for <= 0xffff
18048 if (Min->getOpcode() != ISD::UMIN)
18049 return false;
18050
18051 APInt SaturateC;
18052 if (VT == MVT::v4i32)
18053 SaturateC = APInt(32, (1 << 16) - 1, true);
18054 else //if (VT == MVT::v8i16)
18055 SaturateC = APInt(16, (1 << 8) - 1, true);
18056
18057 APInt MinC;
18058 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18059 MinC != SaturateC)
18060 return false;
18061 return true;
18062 };
18063
18064 if (IsUnsignedSaturate(N)) {
18065 SDLoc DL(N);
18066 MVT HalfVT;
18067 unsigned ExtConst;
18068 if (VT == MVT::v4i32) {
18069 HalfVT = MVT::v8i16;
18070 ExtConst = 0x0000FFFF;
18071 } else { //if (VT == MVT::v8i16)
18072 HalfVT = MVT::v16i8;
18073 ExtConst = 0x00FF;
18074 }
18075
18076 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18077 // an AND. That extend will hopefully be removed if only the bottom bits are
18078 // demanded (though a truncating store, for example).
18079 SDValue VQMOVN =
18080 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18081 DAG.getConstant(0, DL, MVT::i32));
18082 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18083 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18084 DAG.getConstant(ExtConst, DL, VT));
18085 }
18086
18087 return SDValue();
18088}
18089
18091 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18092 if (!C)
18093 return nullptr;
18094 const APInt *CV = &C->getAPIntValue();
18095 return CV->isPowerOf2() ? CV : nullptr;
18096}
18097
18099 // If we have a CMOV, OR and AND combination such as:
18100 // if (x & CN)
18101 // y |= CM;
18102 //
18103 // And:
18104 // * CN is a single bit;
18105 // * All bits covered by CM are known zero in y
18106 //
18107 // Then we can convert this into a sequence of BFI instructions. This will
18108 // always be a win if CM is a single bit, will always be no worse than the
18109 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18110 // three bits (due to the extra IT instruction).
18111
18112 SDValue Op0 = CMOV->getOperand(0);
18113 SDValue Op1 = CMOV->getOperand(1);
18114 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18115 SDValue CmpZ = CMOV->getOperand(4);
18116
18117 // The compare must be against zero.
18118 if (!isNullConstant(CmpZ->getOperand(1)))
18119 return SDValue();
18120
18121 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18122 SDValue And = CmpZ->getOperand(0);
18123 if (And->getOpcode() != ISD::AND)
18124 return SDValue();
18125 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18126 if (!AndC)
18127 return SDValue();
18128 SDValue X = And->getOperand(0);
18129
18130 if (CC == ARMCC::EQ) {
18131 // We're performing an "equal to zero" compare. Swap the operands so we
18132 // canonicalize on a "not equal to zero" compare.
18133 std::swap(Op0, Op1);
18134 } else {
18135 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18136 }
18137
18138 if (Op1->getOpcode() != ISD::OR)
18139 return SDValue();
18140
18141 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18142 if (!OrC)
18143 return SDValue();
18144 SDValue Y = Op1->getOperand(0);
18145
18146 if (Op0 != Y)
18147 return SDValue();
18148
18149 // Now, is it profitable to continue?
18150 APInt OrCI = OrC->getAPIntValue();
18151 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18152 if (OrCI.popcount() > Heuristic)
18153 return SDValue();
18154
18155 // Lastly, can we determine that the bits defined by OrCI
18156 // are zero in Y?
18157 KnownBits Known = DAG.computeKnownBits(Y);
18158 if ((OrCI & Known.Zero) != OrCI)
18159 return SDValue();
18160
18161 // OK, we can do the combine.
18162 SDValue V = Y;
18163 SDLoc dl(X);
18164 EVT VT = X.getValueType();
18165 unsigned BitInX = AndC->logBase2();
18166
18167 if (BitInX != 0) {
18168 // We must shift X first.
18169 X = DAG.getNode(ISD::SRL, dl, VT, X,
18170 DAG.getConstant(BitInX, dl, VT));
18171 }
18172
18173 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18174 BitInY < NumActiveBits; ++BitInY) {
18175 if (OrCI[BitInY] == 0)
18176 continue;
18177 APInt Mask(VT.getSizeInBits(), 0);
18178 Mask.setBit(BitInY);
18179 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18180 // Confusingly, the operand is an *inverted* mask.
18181 DAG.getConstant(~Mask, dl, VT));
18182 }
18183
18184 return V;
18185}
18186
18187// Given N, the value controlling the conditional branch, search for the loop
18188// intrinsic, returning it, along with how the value is used. We need to handle
18189// patterns such as the following:
18190// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18191// (brcond (setcc (loop.decrement), 0, eq), exit)
18192// (brcond (setcc (loop.decrement), 0, ne), header)
18194 bool &Negate) {
18195 switch (N->getOpcode()) {
18196 default:
18197 break;
18198 case ISD::XOR: {
18199 if (!isa<ConstantSDNode>(N.getOperand(1)))
18200 return SDValue();
18201 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18202 return SDValue();
18203 Negate = !Negate;
18204 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18205 }
18206 case ISD::SETCC: {
18207 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18208 if (!Const)
18209 return SDValue();
18210 if (Const->isZero())
18211 Imm = 0;
18212 else if (Const->isOne())
18213 Imm = 1;
18214 else
18215 return SDValue();
18216 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18217 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18218 }
18220 unsigned IntOp = N.getConstantOperandVal(1);
18221 if (IntOp != Intrinsic::test_start_loop_iterations &&
18222 IntOp != Intrinsic::loop_decrement_reg)
18223 return SDValue();
18224 return N;
18225 }
18226 }
18227 return SDValue();
18228}
18229
18232 const ARMSubtarget *ST) {
18233
18234 // The hwloop intrinsics that we're interested are used for control-flow,
18235 // either for entering or exiting the loop:
18236 // - test.start.loop.iterations will test whether its operand is zero. If it
18237 // is zero, the proceeding branch should not enter the loop.
18238 // - loop.decrement.reg also tests whether its operand is zero. If it is
18239 // zero, the proceeding branch should not branch back to the beginning of
18240 // the loop.
18241 // So here, we need to check that how the brcond is using the result of each
18242 // of the intrinsics to ensure that we're branching to the right place at the
18243 // right time.
18244
18246 SDValue Cond;
18247 int Imm = 1;
18248 bool Negate = false;
18249 SDValue Chain = N->getOperand(0);
18250 SDValue Dest;
18251
18252 if (N->getOpcode() == ISD::BRCOND) {
18253 CC = ISD::SETEQ;
18254 Cond = N->getOperand(1);
18255 Dest = N->getOperand(2);
18256 } else {
18257 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18258 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18259 Cond = N->getOperand(2);
18260 Dest = N->getOperand(4);
18261 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18262 if (!Const->isOne() && !Const->isZero())
18263 return SDValue();
18264 Imm = Const->getZExtValue();
18265 } else
18266 return SDValue();
18267 }
18268
18269 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18270 if (!Int)
18271 return SDValue();
18272
18273 if (Negate)
18274 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18275
18276 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18277 return (CC == ISD::SETEQ && Imm == 0) ||
18278 (CC == ISD::SETNE && Imm == 1) ||
18279 (CC == ISD::SETLT && Imm == 1) ||
18280 (CC == ISD::SETULT && Imm == 1);
18281 };
18282
18283 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18284 return (CC == ISD::SETEQ && Imm == 1) ||
18285 (CC == ISD::SETNE && Imm == 0) ||
18286 (CC == ISD::SETGT && Imm == 0) ||
18287 (CC == ISD::SETUGT && Imm == 0) ||
18288 (CC == ISD::SETGE && Imm == 1) ||
18289 (CC == ISD::SETUGE && Imm == 1);
18290 };
18291
18292 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18293 "unsupported condition");
18294
18295 SDLoc dl(Int);
18296 SelectionDAG &DAG = DCI.DAG;
18297 SDValue Elements = Int.getOperand(2);
18298 unsigned IntOp = Int->getConstantOperandVal(1);
18299 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18300 && "expected single br user");
18301 SDNode *Br = *N->use_begin();
18302 SDValue OtherTarget = Br->getOperand(1);
18303
18304 // Update the unconditional branch to branch to the given Dest.
18305 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18306 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18307 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18308 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18309 };
18310
18311 if (IntOp == Intrinsic::test_start_loop_iterations) {
18312 SDValue Res;
18313 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18314 // We expect this 'instruction' to branch when the counter is zero.
18315 if (IsTrueIfZero(CC, Imm)) {
18316 SDValue Ops[] = {Chain, Setup, Dest};
18317 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18318 } else {
18319 // The logic is the reverse of what we need for WLS, so find the other
18320 // basic block target: the target of the proceeding br.
18321 UpdateUncondBr(Br, Dest, DAG);
18322
18323 SDValue Ops[] = {Chain, Setup, OtherTarget};
18324 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18325 }
18326 // Update LR count to the new value
18327 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18328 // Update chain
18329 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18330 return Res;
18331 } else {
18332 SDValue Size =
18333 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18334 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18335 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18336 DAG.getVTList(MVT::i32, MVT::Other), Args);
18337 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18338
18339 // We expect this instruction to branch when the count is not zero.
18340 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18341
18342 // Update the unconditional branch to target the loop preheader if we've
18343 // found the condition has been reversed.
18344 if (Target == OtherTarget)
18345 UpdateUncondBr(Br, Dest, DAG);
18346
18347 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18348 SDValue(LoopDec.getNode(), 1), Chain);
18349
18350 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18351 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18352 }
18353 return SDValue();
18354}
18355
18356/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18357SDValue
18359 SDValue Cmp = N->getOperand(4);
18360 if (Cmp.getOpcode() != ARMISD::CMPZ)
18361 // Only looking at NE cases.
18362 return SDValue();
18363
18364 EVT VT = N->getValueType(0);
18365 SDLoc dl(N);
18366 SDValue LHS = Cmp.getOperand(0);
18367 SDValue RHS = Cmp.getOperand(1);
18368 SDValue Chain = N->getOperand(0);
18369 SDValue BB = N->getOperand(1);
18370 SDValue ARMcc = N->getOperand(2);
18372
18373 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18374 // -> (brcond Chain BB CC CPSR Cmp)
18375 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18376 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18377 LHS->getOperand(0)->hasOneUse() &&
18378 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18379 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18380 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18381 return DAG.getNode(
18382 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18383 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18384 }
18385
18386 return SDValue();
18387}
18388
18389/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18390SDValue
18392 SDValue Cmp = N->getOperand(4);
18393 if (Cmp.getOpcode() != ARMISD::CMPZ)
18394 // Only looking at EQ and NE cases.
18395 return SDValue();
18396
18397 EVT VT = N->getValueType(0);
18398 SDLoc dl(N);
18399 SDValue LHS = Cmp.getOperand(0);
18400 SDValue RHS = Cmp.getOperand(1);
18401 SDValue FalseVal = N->getOperand(0);
18402 SDValue TrueVal = N->getOperand(1);
18403 SDValue ARMcc = N->getOperand(2);
18405
18406 // BFI is only available on V6T2+.
18407 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18409 if (R)
18410 return R;
18411 }
18412
18413 // Simplify
18414 // mov r1, r0
18415 // cmp r1, x
18416 // mov r0, y
18417 // moveq r0, x
18418 // to
18419 // cmp r0, x
18420 // movne r0, y
18421 //
18422 // mov r1, r0
18423 // cmp r1, x
18424 // mov r0, x
18425 // movne r0, y
18426 // to
18427 // cmp r0, x
18428 // movne r0, y
18429 /// FIXME: Turn this into a target neutral optimization?
18430 SDValue Res;
18431 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18432 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18433 N->getOperand(3), Cmp);
18434 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18435 SDValue ARMcc;
18436 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18437 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18438 N->getOperand(3), NewCmp);
18439 }
18440
18441 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18442 // -> (cmov F T CC CPSR Cmp)
18443 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18444 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18446 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18447 LHS->getOperand(2), LHS->getOperand(3),
18448 LHS->getOperand(4));
18449 }
18450
18451 if (!VT.isInteger())
18452 return SDValue();
18453
18454 // Fold away an unneccessary CMPZ/CMOV
18455 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18456 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18457 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18458 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18459 N->getConstantOperandVal(2) == ARMCC::NE) {
18461 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18462 if (N->getConstantOperandVal(2) == ARMCC::NE)
18464 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18465 N->getOperand(1),
18466 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18467 N->getOperand(3), C);
18468 }
18469 }
18470
18471 // Materialize a boolean comparison for integers so we can avoid branching.
18472 if (isNullConstant(FalseVal)) {
18473 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18474 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18475 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18476 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18477 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18478 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18479 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18480 DAG.getConstant(5, dl, MVT::i32));
18481 } else {
18482 // CMOV 0, 1, ==, (CMPZ x, y) ->
18483 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18484 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18485 //
18486 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18487 // x != y. In other words, a carry C == 1 when x == y, C == 0
18488 // otherwise.
18489 // The final UADDO_CARRY computes
18490 // x - y + (0 - (x - y)) + C == C
18491 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18492 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18493 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18494 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18495 // actually.
18496 SDValue Carry =
18497 DAG.getNode(ISD::SUB, dl, MVT::i32,
18498 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18499 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18500 }
18501 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18502 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18503 // This seems pointless but will allow us to combine it further below.
18504 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18505 SDValue Sub =
18506 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18507 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18508 Sub.getValue(1), SDValue());
18509 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18510 N->getOperand(3), CPSRGlue.getValue(1));
18511 FalseVal = Sub;
18512 }
18513 } else if (isNullConstant(TrueVal)) {
18514 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18515 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18516 // This seems pointless but will allow us to combine it further below
18517 // Note that we change == for != as this is the dual for the case above.
18518 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18519 SDValue Sub =
18520 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18521 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18522 Sub.getValue(1), SDValue());
18523 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18524 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18525 N->getOperand(3), CPSRGlue.getValue(1));
18526 FalseVal = Sub;
18527 }
18528 }
18529
18530 // On Thumb1, the DAG above may be further combined if z is a power of 2
18531 // (z == 2 ^ K).
18532 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18533 // t1 = (USUBO (SUB x, y), 1)
18534 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18535 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18536 //
18537 // This also handles the special case of comparing against zero; it's
18538 // essentially, the same pattern, except there's no SUBC:
18539 // CMOV x, z, !=, (CMPZ x, 0) ->
18540 // t1 = (USUBO x, 1)
18541 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18542 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18543 const APInt *TrueConst;
18544 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18545 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18546 FalseVal.getOperand(1) == RHS) ||
18547 (FalseVal == LHS && isNullConstant(RHS))) &&
18548 (TrueConst = isPowerOf2Constant(TrueVal))) {
18549 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18550 unsigned ShiftAmount = TrueConst->logBase2();
18551 if (ShiftAmount)
18552 TrueVal = DAG.getConstant(1, dl, VT);
18553 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18554 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18555 Subc.getValue(1));
18556
18557 if (ShiftAmount)
18558 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18559 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18560 }
18561
18562 if (Res.getNode()) {
18563 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18564 // Capture demanded bits information that would be otherwise lost.
18565 if (Known.Zero == 0xfffffffe)
18566 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18567 DAG.getValueType(MVT::i1));
18568 else if (Known.Zero == 0xffffff00)
18569 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18570 DAG.getValueType(MVT::i8));
18571 else if (Known.Zero == 0xffff0000)
18572 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18573 DAG.getValueType(MVT::i16));
18574 }
18575
18576 return Res;
18577}
18578
18581 const ARMSubtarget *ST) {
18582 SelectionDAG &DAG = DCI.DAG;
18583 SDValue Src = N->getOperand(0);
18584 EVT DstVT = N->getValueType(0);
18585
18586 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18587 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18588 EVT SrcVT = Src.getValueType();
18589 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18590 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18591 }
18592
18593 // We may have a bitcast of something that has already had this bitcast
18594 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18595 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18596 Src = Src.getOperand(0);
18597
18598 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18599 // would be generated is at least the width of the element type.
18600 EVT SrcVT = Src.getValueType();
18601 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18602 Src.getOpcode() == ARMISD::VMVNIMM ||
18603 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18604 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18605 DAG.getDataLayout().isBigEndian())
18606 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18607
18608 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18609 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18610 return R;
18611
18612 return SDValue();
18613}
18614
18615// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18616// node into stack operations after legalizeOps.
18619 SelectionDAG &DAG = DCI.DAG;
18620 EVT VT = N->getValueType(0);
18621 SDLoc DL(N);
18622
18623 // MVETrunc(Undef, Undef) -> Undef
18624 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18625 return DAG.getUNDEF(VT);
18626
18627 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18628 if (N->getNumOperands() == 2 &&
18629 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18630 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18631 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18632 N->getOperand(0).getOperand(1),
18633 N->getOperand(1).getOperand(0),
18634 N->getOperand(1).getOperand(1));
18635
18636 // MVETrunc(shuffle, shuffle) -> VMOVN
18637 if (N->getNumOperands() == 2 &&
18638 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18639 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18640 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18641 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18642
18643 if (S0->getOperand(0) == S1->getOperand(0) &&
18644 S0->getOperand(1) == S1->getOperand(1)) {
18645 // Construct complete shuffle mask
18646 SmallVector<int, 8> Mask(S0->getMask());
18647 Mask.append(S1->getMask().begin(), S1->getMask().end());
18648
18649 if (isVMOVNTruncMask(Mask, VT, false))
18650 return DAG.getNode(
18651 ARMISD::VMOVN, DL, VT,
18652 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18653 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18654 DAG.getConstant(1, DL, MVT::i32));
18655 if (isVMOVNTruncMask(Mask, VT, true))
18656 return DAG.getNode(
18657 ARMISD::VMOVN, DL, VT,
18658 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18659 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18660 DAG.getConstant(1, DL, MVT::i32));
18661 }
18662 }
18663
18664 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18665 // truncate to a buildvector to allow the generic optimisations to kick in.
18666 if (all_of(N->ops(), [](SDValue Op) {
18667 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18668 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18669 (Op.getOpcode() == ISD::BITCAST &&
18670 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18671 })) {
18672 SmallVector<SDValue, 8> Extracts;
18673 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18674 SDValue O = N->getOperand(Op);
18675 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18676 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18677 DAG.getConstant(i, DL, MVT::i32));
18678 Extracts.push_back(Ext);
18679 }
18680 }
18681 return DAG.getBuildVector(VT, DL, Extracts);
18682 }
18683
18684 // If we are late in the legalization process and nothing has optimised
18685 // the trunc to anything better, lower it to a stack store and reload,
18686 // performing the truncation whilst keeping the lanes in the correct order:
18687 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18688 if (!DCI.isAfterLegalizeDAG())
18689 return SDValue();
18690
18691 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18692 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18693 int NumIns = N->getNumOperands();
18694 assert((NumIns == 2 || NumIns == 4) &&
18695 "Expected 2 or 4 inputs to an MVETrunc");
18696 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18697 if (N->getNumOperands() == 4)
18698 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18699
18700 SmallVector<SDValue> Chains;
18701 for (int I = 0; I < NumIns; I++) {
18702 SDValue Ptr = DAG.getNode(
18703 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18704 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18706 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18707 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18708 Ptr, MPI, StoreVT, Align(4));
18709 Chains.push_back(Ch);
18710 }
18711
18712 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18713 MachinePointerInfo MPI =
18715 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18716}
18717
18718// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18720 SelectionDAG &DAG) {
18721 SDValue N0 = N->getOperand(0);
18722 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18723 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18724 return SDValue();
18725
18726 EVT FromVT = LD->getMemoryVT();
18727 EVT ToVT = N->getValueType(0);
18728 if (!ToVT.isVector())
18729 return SDValue();
18730 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18731 EVT ToEltVT = ToVT.getVectorElementType();
18732 EVT FromEltVT = FromVT.getVectorElementType();
18733
18734 unsigned NumElements = 0;
18735 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18736 NumElements = 4;
18737 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18738 NumElements = 8;
18739 assert(NumElements != 0);
18740
18741 ISD::LoadExtType NewExtType =
18742 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18743 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18744 LD->getExtensionType() != ISD::EXTLOAD &&
18745 LD->getExtensionType() != NewExtType)
18746 return SDValue();
18747
18748 LLVMContext &C = *DAG.getContext();
18749 SDLoc DL(LD);
18750 // Details about the old load
18751 SDValue Ch = LD->getChain();
18752 SDValue BasePtr = LD->getBasePtr();
18753 Align Alignment = LD->getOriginalAlign();
18754 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18755 AAMDNodes AAInfo = LD->getAAInfo();
18756
18757 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18758 EVT NewFromVT = EVT::getVectorVT(
18759 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18760 EVT NewToVT = EVT::getVectorVT(
18761 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18762
18765 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18766 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18767 SDValue NewPtr =
18768 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18769
18770 SDValue NewLoad =
18771 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18772 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18773 Alignment, MMOFlags, AAInfo);
18774 Loads.push_back(NewLoad);
18775 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18776 }
18777
18778 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18779 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18780 return DAG.getMergeValues(Loads, DL);
18781}
18782
18783// Perform combines for MVEEXT. If it has not be optimized to anything better
18784// before lowering, it gets converted to stack store and extloads performing the
18785// extend whilst still keeping the same lane ordering.
18788 SelectionDAG &DAG = DCI.DAG;
18789 EVT VT = N->getValueType(0);
18790 SDLoc DL(N);
18791 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18792 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18793
18794 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18795 *DAG.getContext());
18796 auto Extend = [&](SDValue V) {
18797 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18798 return N->getOpcode() == ARMISD::MVESEXT
18799 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18800 DAG.getValueType(ExtVT))
18801 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18802 };
18803
18804 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18805 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18806 SDValue Ext = Extend(N->getOperand(0));
18807 return DAG.getMergeValues({Ext, Ext}, DL);
18808 }
18809
18810 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18811 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18812 ArrayRef<int> Mask = SVN->getMask();
18813 assert(Mask.size() == 2 * VT.getVectorNumElements());
18814 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18815 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18816 SDValue Op0 = SVN->getOperand(0);
18817 SDValue Op1 = SVN->getOperand(1);
18818
18819 auto CheckInregMask = [&](int Start, int Offset) {
18820 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18821 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18822 return false;
18823 return true;
18824 };
18825 SDValue V0 = SDValue(N, 0);
18826 SDValue V1 = SDValue(N, 1);
18827 if (CheckInregMask(0, 0))
18828 V0 = Extend(Op0);
18829 else if (CheckInregMask(0, 1))
18830 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18831 else if (CheckInregMask(0, Mask.size()))
18832 V0 = Extend(Op1);
18833 else if (CheckInregMask(0, Mask.size() + 1))
18834 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18835
18836 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18837 V1 = Extend(Op1);
18838 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18839 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18840 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18841 V1 = Extend(Op0);
18842 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18843 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18844
18845 if (V0.getNode() != N || V1.getNode() != N)
18846 return DAG.getMergeValues({V0, V1}, DL);
18847 }
18848
18849 // MVEEXT(load) -> extload, extload
18850 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18852 return L;
18853
18854 if (!DCI.isAfterLegalizeDAG())
18855 return SDValue();
18856
18857 // Lower to a stack store and reload:
18858 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18859 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18860 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18861 int NumOuts = N->getNumValues();
18862 assert((NumOuts == 2 || NumOuts == 4) &&
18863 "Expected 2 or 4 outputs to an MVEEXT");
18864 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18865 *DAG.getContext());
18866 if (N->getNumOperands() == 4)
18867 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18868
18869 MachinePointerInfo MPI =
18871 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18872 StackPtr, MPI, Align(4));
18873
18875 for (int I = 0; I < NumOuts; I++) {
18876 SDValue Ptr = DAG.getNode(
18877 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18878 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18880 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18881 SDValue Load = DAG.getExtLoad(
18882 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18883 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18884 Loads.push_back(Load);
18885 }
18886
18887 return DAG.getMergeValues(Loads, DL);
18888}
18889
18891 DAGCombinerInfo &DCI) const {
18892 switch (N->getOpcode()) {
18893 default: break;
18894 case ISD::SELECT_CC:
18895 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18896 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18897 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18898 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18899 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18900 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18901 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18902 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18903 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18904 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18905 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18906 case ISD::BRCOND:
18907 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18908 case ARMISD::ADDC:
18909 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18910 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18911 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18912 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18913 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18914 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18915 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18916 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18917 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18920 return PerformExtractEltCombine(N, DCI, Subtarget);
18924 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18925 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18926 case ISD::FP_TO_SINT:
18927 case ISD::FP_TO_UINT:
18928 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18929 case ISD::FADD:
18930 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18931 case ISD::FMUL:
18932 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18934 return PerformIntrinsicCombine(N, DCI);
18935 case ISD::SHL:
18936 case ISD::SRA:
18937 case ISD::SRL:
18938 return PerformShiftCombine(N, DCI, Subtarget);
18939 case ISD::SIGN_EXTEND:
18940 case ISD::ZERO_EXTEND:
18941 case ISD::ANY_EXTEND:
18942 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18943 case ISD::FP_EXTEND:
18944 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18945 case ISD::SMIN:
18946 case ISD::UMIN:
18947 case ISD::SMAX:
18948 case ISD::UMAX:
18949 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18950 case ARMISD::CMOV:
18951 return PerformCMOVCombine(N, DCI.DAG);
18952 case ARMISD::BRCOND:
18953 return PerformBRCONDCombine(N, DCI.DAG);
18954 case ARMISD::CMPZ:
18955 return PerformCMPZCombine(N, DCI.DAG);
18956 case ARMISD::CSINC:
18957 case ARMISD::CSINV:
18958 case ARMISD::CSNEG:
18959 return PerformCSETCombine(N, DCI.DAG);
18960 case ISD::LOAD:
18961 return PerformLOADCombine(N, DCI, Subtarget);
18962 case ARMISD::VLD1DUP:
18963 case ARMISD::VLD2DUP:
18964 case ARMISD::VLD3DUP:
18965 case ARMISD::VLD4DUP:
18966 return PerformVLDCombine(N, DCI);
18968 return PerformARMBUILD_VECTORCombine(N, DCI);
18969 case ISD::BITCAST:
18970 return PerformBITCASTCombine(N, DCI, Subtarget);
18972 return PerformPREDICATE_CASTCombine(N, DCI);
18974 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18975 case ARMISD::MVETRUNC:
18976 return PerformMVETruncCombine(N, DCI);
18977 case ARMISD::MVESEXT:
18978 case ARMISD::MVEZEXT:
18979 return PerformMVEExtCombine(N, DCI);
18980 case ARMISD::VCMP:
18981 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18982 case ISD::VECREDUCE_ADD:
18983 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18984 case ARMISD::VADDVs:
18985 case ARMISD::VADDVu:
18986 case ARMISD::VADDLVs:
18987 case ARMISD::VADDLVu:
18988 case ARMISD::VADDLVAs:
18989 case ARMISD::VADDLVAu:
18990 case ARMISD::VMLAVs:
18991 case ARMISD::VMLAVu:
18992 case ARMISD::VMLALVs:
18993 case ARMISD::VMLALVu:
18994 case ARMISD::VMLALVAs:
18995 case ARMISD::VMLALVAu:
18996 return PerformReduceShuffleCombine(N, DCI.DAG);
18997 case ARMISD::VMOVN:
18998 return PerformVMOVNCombine(N, DCI);
18999 case ARMISD::VQMOVNs:
19000 case ARMISD::VQMOVNu:
19001 return PerformVQMOVNCombine(N, DCI);
19002 case ARMISD::VQDMULH:
19003 return PerformVQDMULHCombine(N, DCI);
19004 case ARMISD::ASRL:
19005 case ARMISD::LSRL:
19006 case ARMISD::LSLL:
19007 return PerformLongShiftCombine(N, DCI.DAG);
19008 case ARMISD::SMULWB: {
19009 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19010 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19011 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19012 return SDValue();
19013 break;
19014 }
19015 case ARMISD::SMULWT: {
19016 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19017 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19018 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19019 return SDValue();
19020 break;
19021 }
19022 case ARMISD::SMLALBB:
19023 case ARMISD::QADD16b:
19024 case ARMISD::QSUB16b:
19025 case ARMISD::UQADD16b:
19026 case ARMISD::UQSUB16b: {
19027 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19028 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19029 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19030 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19031 return SDValue();
19032 break;
19033 }
19034 case ARMISD::SMLALBT: {
19035 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19036 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19037 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19038 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19039 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19040 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19041 return SDValue();
19042 break;
19043 }
19044 case ARMISD::SMLALTB: {
19045 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19046 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19047 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19048 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19049 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19050 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19051 return SDValue();
19052 break;
19053 }
19054 case ARMISD::SMLALTT: {
19055 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19056 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19057 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19058 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19059 return SDValue();
19060 break;
19061 }
19062 case ARMISD::QADD8b:
19063 case ARMISD::QSUB8b:
19064 case ARMISD::UQADD8b:
19065 case ARMISD::UQSUB8b: {
19066 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19067 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19068 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19069 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19070 return SDValue();
19071 break;
19072 }
19075 switch (N->getConstantOperandVal(1)) {
19076 case Intrinsic::arm_neon_vld1:
19077 case Intrinsic::arm_neon_vld1x2:
19078 case Intrinsic::arm_neon_vld1x3:
19079 case Intrinsic::arm_neon_vld1x4:
19080 case Intrinsic::arm_neon_vld2:
19081 case Intrinsic::arm_neon_vld3:
19082 case Intrinsic::arm_neon_vld4:
19083 case Intrinsic::arm_neon_vld2lane:
19084 case Intrinsic::arm_neon_vld3lane:
19085 case Intrinsic::arm_neon_vld4lane:
19086 case Intrinsic::arm_neon_vld2dup:
19087 case Intrinsic::arm_neon_vld3dup:
19088 case Intrinsic::arm_neon_vld4dup:
19089 case Intrinsic::arm_neon_vst1:
19090 case Intrinsic::arm_neon_vst1x2:
19091 case Intrinsic::arm_neon_vst1x3:
19092 case Intrinsic::arm_neon_vst1x4:
19093 case Intrinsic::arm_neon_vst2:
19094 case Intrinsic::arm_neon_vst3:
19095 case Intrinsic::arm_neon_vst4:
19096 case Intrinsic::arm_neon_vst2lane:
19097 case Intrinsic::arm_neon_vst3lane:
19098 case Intrinsic::arm_neon_vst4lane:
19099 return PerformVLDCombine(N, DCI);
19100 case Intrinsic::arm_mve_vld2q:
19101 case Intrinsic::arm_mve_vld4q:
19102 case Intrinsic::arm_mve_vst2q:
19103 case Intrinsic::arm_mve_vst4q:
19104 return PerformMVEVLDCombine(N, DCI);
19105 default: break;
19106 }
19107 break;
19108 }
19109 return SDValue();
19110}
19111
19113 EVT VT) const {
19114 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19115}
19116
19118 Align Alignment,
19120 unsigned *Fast) const {
19121 // Depends what it gets converted into if the type is weird.
19122 if (!VT.isSimple())
19123 return false;
19124
19125 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19126 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19127 auto Ty = VT.getSimpleVT().SimpleTy;
19128
19129 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19130 // Unaligned access can use (for example) LRDB, LRDH, LDR
19131 if (AllowsUnaligned) {
19132 if (Fast)
19133 *Fast = Subtarget->hasV7Ops();
19134 return true;
19135 }
19136 }
19137
19138 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19139 // For any little-endian targets with neon, we can support unaligned ld/st
19140 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19141 // A big-endian target may also explicitly support unaligned accesses
19142 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19143 if (Fast)
19144 *Fast = 1;
19145 return true;
19146 }
19147 }
19148
19149 if (!Subtarget->hasMVEIntegerOps())
19150 return false;
19151
19152 // These are for predicates
19153 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19154 Ty == MVT::v2i1)) {
19155 if (Fast)
19156 *Fast = 1;
19157 return true;
19158 }
19159
19160 // These are for truncated stores/narrowing loads. They are fine so long as
19161 // the alignment is at least the size of the item being loaded
19162 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19163 Alignment >= VT.getScalarSizeInBits() / 8) {
19164 if (Fast)
19165 *Fast = true;
19166 return true;
19167 }
19168
19169 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19170 // VSTRW.U32 all store the vector register in exactly the same format, and
19171 // differ only in the range of their immediate offset field and the required
19172 // alignment. So there is always a store that can be used, regardless of
19173 // actual type.
19174 //
19175 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19176 // VREV64.8) pair and get the same effect. This will likely be better than
19177 // aligning the vector through the stack.
19178 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19179 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19180 Ty == MVT::v2f64) {
19181 if (Fast)
19182 *Fast = 1;
19183 return true;
19184 }
19185
19186 return false;
19187}
19188
19189
19191 const MemOp &Op, const AttributeList &FuncAttributes) const {
19192 // See if we can use NEON instructions for this...
19193 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19194 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19195 unsigned Fast;
19196 if (Op.size() >= 16 &&
19197 (Op.isAligned(Align(16)) ||
19198 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19200 Fast))) {
19201 return MVT::v2f64;
19202 } else if (Op.size() >= 8 &&
19203 (Op.isAligned(Align(8)) ||
19205 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19206 Fast))) {
19207 return MVT::f64;
19208 }
19209 }
19210
19211 // Let the target-independent logic figure it out.
19212 return MVT::Other;
19213}
19214
19215// 64-bit integers are split into their high and low parts and held in two
19216// different registers, so the trunc is free since the low register can just
19217// be used.
19218bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19219 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19220 return false;
19221 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19222 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19223 return (SrcBits == 64 && DestBits == 32);
19224}
19225
19227 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19228 !DstVT.isInteger())
19229 return false;
19230 unsigned SrcBits = SrcVT.getSizeInBits();
19231 unsigned DestBits = DstVT.getSizeInBits();
19232 return (SrcBits == 64 && DestBits == 32);
19233}
19234
19236 if (Val.getOpcode() != ISD::LOAD)
19237 return false;
19238
19239 EVT VT1 = Val.getValueType();
19240 if (!VT1.isSimple() || !VT1.isInteger() ||
19241 !VT2.isSimple() || !VT2.isInteger())
19242 return false;
19243
19244 switch (VT1.getSimpleVT().SimpleTy) {
19245 default: break;
19246 case MVT::i1:
19247 case MVT::i8:
19248 case MVT::i16:
19249 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19250 return true;
19251 }
19252
19253 return false;
19254}
19255
19257 if (!VT.isSimple())
19258 return false;
19259
19260 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19261 // negate values directly (fneg is free). So, we don't want to let the DAG
19262 // combiner rewrite fneg into xors and some other instructions. For f16 and
19263 // FullFP16 argument passing, some bitcast nodes may be introduced,
19264 // triggering this DAG combine rewrite, so we are avoiding that with this.
19265 switch (VT.getSimpleVT().SimpleTy) {
19266 default: break;
19267 case MVT::f16:
19268 return Subtarget->hasFullFP16();
19269 }
19270
19271 return false;
19272}
19273
19274/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19275/// of the vector elements.
19276static bool areExtractExts(Value *Ext1, Value *Ext2) {
19277 auto areExtDoubled = [](Instruction *Ext) {
19278 return Ext->getType()->getScalarSizeInBits() ==
19279 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19280 };
19281
19282 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19283 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19284 !areExtDoubled(cast<Instruction>(Ext1)) ||
19285 !areExtDoubled(cast<Instruction>(Ext2)))
19286 return false;
19287
19288 return true;
19289}
19290
19291/// Check if sinking \p I's operands to I's basic block is profitable, because
19292/// the operands can be folded into a target instruction, e.g.
19293/// sext/zext can be folded into vsubl.
19295 SmallVectorImpl<Use *> &Ops) const {
19296 if (!I->getType()->isVectorTy())
19297 return false;
19298
19299 if (Subtarget->hasNEON()) {
19300 switch (I->getOpcode()) {
19301 case Instruction::Sub:
19302 case Instruction::Add: {
19303 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19304 return false;
19305 Ops.push_back(&I->getOperandUse(0));
19306 Ops.push_back(&I->getOperandUse(1));
19307 return true;
19308 }
19309 default:
19310 return false;
19311 }
19312 }
19313
19314 if (!Subtarget->hasMVEIntegerOps())
19315 return false;
19316
19317 auto IsFMSMul = [&](Instruction *I) {
19318 if (!I->hasOneUse())
19319 return false;
19320 auto *Sub = cast<Instruction>(*I->users().begin());
19321 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19322 };
19323 auto IsFMS = [&](Instruction *I) {
19324 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19325 match(I->getOperand(1), m_FNeg(m_Value())))
19326 return true;
19327 return false;
19328 };
19329
19330 auto IsSinker = [&](Instruction *I, int Operand) {
19331 switch (I->getOpcode()) {
19332 case Instruction::Add:
19333 case Instruction::Mul:
19334 case Instruction::FAdd:
19335 case Instruction::ICmp:
19336 case Instruction::FCmp:
19337 return true;
19338 case Instruction::FMul:
19339 return !IsFMSMul(I);
19340 case Instruction::Sub:
19341 case Instruction::FSub:
19342 case Instruction::Shl:
19343 case Instruction::LShr:
19344 case Instruction::AShr:
19345 return Operand == 1;
19346 case Instruction::Call:
19347 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19348 switch (II->getIntrinsicID()) {
19349 case Intrinsic::fma:
19350 return !IsFMS(I);
19351 case Intrinsic::sadd_sat:
19352 case Intrinsic::uadd_sat:
19353 case Intrinsic::arm_mve_add_predicated:
19354 case Intrinsic::arm_mve_mul_predicated:
19355 case Intrinsic::arm_mve_qadd_predicated:
19356 case Intrinsic::arm_mve_vhadd:
19357 case Intrinsic::arm_mve_hadd_predicated:
19358 case Intrinsic::arm_mve_vqdmull:
19359 case Intrinsic::arm_mve_vqdmull_predicated:
19360 case Intrinsic::arm_mve_vqdmulh:
19361 case Intrinsic::arm_mve_qdmulh_predicated:
19362 case Intrinsic::arm_mve_vqrdmulh:
19363 case Intrinsic::arm_mve_qrdmulh_predicated:
19364 case Intrinsic::arm_mve_fma_predicated:
19365 return true;
19366 case Intrinsic::ssub_sat:
19367 case Intrinsic::usub_sat:
19368 case Intrinsic::arm_mve_sub_predicated:
19369 case Intrinsic::arm_mve_qsub_predicated:
19370 case Intrinsic::arm_mve_hsub_predicated:
19371 case Intrinsic::arm_mve_vhsub:
19372 return Operand == 1;
19373 default:
19374 return false;
19375 }
19376 }
19377 return false;
19378 default:
19379 return false;
19380 }
19381 };
19382
19383 for (auto OpIdx : enumerate(I->operands())) {
19384 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19385 // Make sure we are not already sinking this operand
19386 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19387 continue;
19388
19389 Instruction *Shuffle = Op;
19390 if (Shuffle->getOpcode() == Instruction::BitCast)
19391 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19392 // We are looking for a splat that can be sunk.
19393 if (!Shuffle ||
19394 !match(Shuffle, m_Shuffle(
19396 m_Undef(), m_ZeroMask())))
19397 continue;
19398 if (!IsSinker(I, OpIdx.index()))
19399 continue;
19400
19401 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19402 // and vector registers
19403 for (Use &U : Op->uses()) {
19404 Instruction *Insn = cast<Instruction>(U.getUser());
19405 if (!IsSinker(Insn, U.getOperandNo()))
19406 return false;
19407 }
19408
19409 Ops.push_back(&Shuffle->getOperandUse(0));
19410 if (Shuffle != Op)
19411 Ops.push_back(&Op->getOperandUse(0));
19412 Ops.push_back(&OpIdx.value());
19413 }
19414 return true;
19415}
19416
19418 if (!Subtarget->hasMVEIntegerOps())
19419 return nullptr;
19420 Type *SVIType = SVI->getType();
19421 Type *ScalarType = SVIType->getScalarType();
19422
19423 if (ScalarType->isFloatTy())
19424 return Type::getInt32Ty(SVIType->getContext());
19425 if (ScalarType->isHalfTy())
19426 return Type::getInt16Ty(SVIType->getContext());
19427 return nullptr;
19428}
19429
19431 EVT VT = ExtVal.getValueType();
19432
19433 if (!isTypeLegal(VT))
19434 return false;
19435
19436 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19437 if (Ld->isExpandingLoad())
19438 return false;
19439 }
19440
19441 if (Subtarget->hasMVEIntegerOps())
19442 return true;
19443
19444 // Don't create a loadext if we can fold the extension into a wide/long
19445 // instruction.
19446 // If there's more than one user instruction, the loadext is desirable no
19447 // matter what. There can be two uses by the same instruction.
19448 if (ExtVal->use_empty() ||
19449 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19450 return true;
19451
19452 SDNode *U = *ExtVal->use_begin();
19453 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19454 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19455 return false;
19456
19457 return true;
19458}
19459
19461 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19462 return false;
19463
19464 if (!isTypeLegal(EVT::getEVT(Ty1)))
19465 return false;
19466
19467 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19468
19469 // Assuming the caller doesn't have a zeroext or signext return parameter,
19470 // truncation all the way down to i1 is valid.
19471 return true;
19472}
19473
19474/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19475/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19476/// expanded to FMAs when this method returns true, otherwise fmuladd is
19477/// expanded to fmul + fadd.
19478///
19479/// ARM supports both fused and unfused multiply-add operations; we already
19480/// lower a pair of fmul and fadd to the latter so it's not clear that there
19481/// would be a gain or that the gain would be worthwhile enough to risk
19482/// correctness bugs.
19483///
19484/// For MVE, we set this to true as it helps simplify the need for some
19485/// patterns (and we don't have the non-fused floating point instruction).
19486bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19487 EVT VT) const {
19488 if (!VT.isSimple())
19489 return false;
19490
19491 switch (VT.getSimpleVT().SimpleTy) {
19492 case MVT::v4f32:
19493 case MVT::v8f16:
19494 return Subtarget->hasMVEFloatOps();
19495 case MVT::f16:
19496 return Subtarget->useFPVFMx16();
19497 case MVT::f32:
19498 return Subtarget->useFPVFMx();
19499 case MVT::f64:
19500 return Subtarget->useFPVFMx64();
19501 default:
19502 break;
19503 }
19504
19505 return false;
19506}
19507
19508static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19509 if (V < 0)
19510 return false;
19511
19512 unsigned Scale = 1;
19513 switch (VT.getSimpleVT().SimpleTy) {
19514 case MVT::i1:
19515 case MVT::i8:
19516 // Scale == 1;
19517 break;
19518 case MVT::i16:
19519 // Scale == 2;
19520 Scale = 2;
19521 break;
19522 default:
19523 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19524 // Scale == 4;
19525 Scale = 4;
19526 break;
19527 }
19528
19529 if ((V & (Scale - 1)) != 0)
19530 return false;
19531 return isUInt<5>(V / Scale);
19532}
19533
19534static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19535 const ARMSubtarget *Subtarget) {
19536 if (!VT.isInteger() && !VT.isFloatingPoint())
19537 return false;
19538 if (VT.isVector() && Subtarget->hasNEON())
19539 return false;
19540 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19541 !Subtarget->hasMVEFloatOps())
19542 return false;
19543
19544 bool IsNeg = false;
19545 if (V < 0) {
19546 IsNeg = true;
19547 V = -V;
19548 }
19549
19550 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19551
19552 // MVE: size * imm7
19553 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19554 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19555 case MVT::i32:
19556 case MVT::f32:
19557 return isShiftedUInt<7,2>(V);
19558 case MVT::i16:
19559 case MVT::f16:
19560 return isShiftedUInt<7,1>(V);
19561 case MVT::i8:
19562 return isUInt<7>(V);
19563 default:
19564 return false;
19565 }
19566 }
19567
19568 // half VLDR: 2 * imm8
19569 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19570 return isShiftedUInt<8, 1>(V);
19571 // VLDR and LDRD: 4 * imm8
19572 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19573 return isShiftedUInt<8, 2>(V);
19574
19575 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19576 // + imm12 or - imm8
19577 if (IsNeg)
19578 return isUInt<8>(V);
19579 return isUInt<12>(V);
19580 }
19581
19582 return false;
19583}
19584
19585/// isLegalAddressImmediate - Return true if the integer value can be used
19586/// as the offset of the target addressing mode for load / store of the
19587/// given type.
19588static bool isLegalAddressImmediate(int64_t V, EVT VT,
19589 const ARMSubtarget *Subtarget) {
19590 if (V == 0)
19591 return true;
19592
19593 if (!VT.isSimple())
19594 return false;
19595
19596 if (Subtarget->isThumb1Only())
19597 return isLegalT1AddressImmediate(V, VT);
19598 else if (Subtarget->isThumb2())
19599 return isLegalT2AddressImmediate(V, VT, Subtarget);
19600
19601 // ARM mode.
19602 if (V < 0)
19603 V = - V;
19604 switch (VT.getSimpleVT().SimpleTy) {
19605 default: return false;
19606 case MVT::i1:
19607 case MVT::i8:
19608 case MVT::i32:
19609 // +- imm12
19610 return isUInt<12>(V);
19611 case MVT::i16:
19612 // +- imm8
19613 return isUInt<8>(V);
19614 case MVT::f32:
19615 case MVT::f64:
19616 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19617 return false;
19618 return isShiftedUInt<8, 2>(V);
19619 }
19620}
19621
19623 EVT VT) const {
19624 int Scale = AM.Scale;
19625 if (Scale < 0)
19626 return false;
19627
19628 switch (VT.getSimpleVT().SimpleTy) {
19629 default: return false;
19630 case MVT::i1:
19631 case MVT::i8:
19632 case MVT::i16:
19633 case MVT::i32:
19634 if (Scale == 1)
19635 return true;
19636 // r + r << imm
19637 Scale = Scale & ~1;
19638 return Scale == 2 || Scale == 4 || Scale == 8;
19639 case MVT::i64:
19640 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19641 // version in Thumb mode.
19642 // r + r
19643 if (Scale == 1)
19644 return true;
19645 // r * 2 (this can be lowered to r + r).
19646 if (!AM.HasBaseReg && Scale == 2)
19647 return true;
19648 return false;
19649 case MVT::isVoid:
19650 // Note, we allow "void" uses (basically, uses that aren't loads or
19651 // stores), because arm allows folding a scale into many arithmetic
19652 // operations. This should be made more precise and revisited later.
19653
19654 // Allow r << imm, but the imm has to be a multiple of two.
19655 if (Scale & 1) return false;
19656 return isPowerOf2_32(Scale);
19657 }
19658}
19659
19661 EVT VT) const {
19662 const int Scale = AM.Scale;
19663
19664 // Negative scales are not supported in Thumb1.
19665 if (Scale < 0)
19666 return false;
19667
19668 // Thumb1 addressing modes do not support register scaling excepting the
19669 // following cases:
19670 // 1. Scale == 1 means no scaling.
19671 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19672 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19673}
19674
19675/// isLegalAddressingMode - Return true if the addressing mode represented
19676/// by AM is legal for this target, for a load/store of the specified type.
19678 const AddrMode &AM, Type *Ty,
19679 unsigned AS, Instruction *I) const {
19680 EVT VT = getValueType(DL, Ty, true);
19681 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19682 return false;
19683
19684 // Can never fold addr of global into load/store.
19685 if (AM.BaseGV)
19686 return false;
19687
19688 switch (AM.Scale) {
19689 case 0: // no scale reg, must be "r+i" or "r", or "i".
19690 break;
19691 default:
19692 // ARM doesn't support any R+R*scale+imm addr modes.
19693 if (AM.BaseOffs)
19694 return false;
19695
19696 if (!VT.isSimple())
19697 return false;
19698
19699 if (Subtarget->isThumb1Only())
19700 return isLegalT1ScaledAddressingMode(AM, VT);
19701
19702 if (Subtarget->isThumb2())
19703 return isLegalT2ScaledAddressingMode(AM, VT);
19704
19705 int Scale = AM.Scale;
19706 switch (VT.getSimpleVT().SimpleTy) {
19707 default: return false;
19708 case MVT::i1:
19709 case MVT::i8:
19710 case MVT::i32:
19711 if (Scale < 0) Scale = -Scale;
19712 if (Scale == 1)
19713 return true;
19714 // r + r << imm
19715 return isPowerOf2_32(Scale & ~1);
19716 case MVT::i16:
19717 case MVT::i64:
19718 // r +/- r
19719 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19720 return true;
19721 // r * 2 (this can be lowered to r + r).
19722 if (!AM.HasBaseReg && Scale == 2)
19723 return true;
19724 return false;
19725
19726 case MVT::isVoid:
19727 // Note, we allow "void" uses (basically, uses that aren't loads or
19728 // stores), because arm allows folding a scale into many arithmetic
19729 // operations. This should be made more precise and revisited later.
19730
19731 // Allow r << imm, but the imm has to be a multiple of two.
19732 if (Scale & 1) return false;
19733 return isPowerOf2_32(Scale);
19734 }
19735 }
19736 return true;
19737}
19738
19739/// isLegalICmpImmediate - Return true if the specified immediate is legal
19740/// icmp immediate, that is the target has icmp instructions which can compare
19741/// a register against the immediate without having to materialize the
19742/// immediate into a register.
19744 // Thumb2 and ARM modes can use cmn for negative immediates.
19745 if (!Subtarget->isThumb())
19746 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19747 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19748 if (Subtarget->isThumb2())
19749 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19750 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19751 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19752 return Imm >= 0 && Imm <= 255;
19753}
19754
19755/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19756/// *or sub* immediate, that is the target has add or sub instructions which can
19757/// add a register with the immediate without having to materialize the
19758/// immediate into a register.
19760 // Same encoding for add/sub, just flip the sign.
19761 int64_t AbsImm = std::abs(Imm);
19762 if (!Subtarget->isThumb())
19763 return ARM_AM::getSOImmVal(AbsImm) != -1;
19764 if (Subtarget->isThumb2())
19765 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19766 // Thumb1 only has 8-bit unsigned immediate.
19767 return AbsImm >= 0 && AbsImm <= 255;
19768}
19769
19770// Return false to prevent folding
19771// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19772// if the folding leads to worse code.
19774 SDValue ConstNode) const {
19775 // Let the DAGCombiner decide for vector types and large types.
19776 const EVT VT = AddNode.getValueType();
19777 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19778 return true;
19779
19780 // It is worse if c0 is legal add immediate, while c1*c0 is not
19781 // and has to be composed by at least two instructions.
19782 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19783 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19784 const int64_t C0 = C0Node->getSExtValue();
19785 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19787 return true;
19788 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19789 return false;
19790
19791 // Default to true and let the DAGCombiner decide.
19792 return true;
19793}
19794
19796 bool isSEXTLoad, SDValue &Base,
19797 SDValue &Offset, bool &isInc,
19798 SelectionDAG &DAG) {
19799 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19800 return false;
19801
19802 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19803 // AddressingMode 3
19804 Base = Ptr->getOperand(0);
19805 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19806 int RHSC = (int)RHS->getZExtValue();
19807 if (RHSC < 0 && RHSC > -256) {
19808 assert(Ptr->getOpcode() == ISD::ADD);
19809 isInc = false;
19810 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19811 return true;
19812 }
19813 }
19814 isInc = (Ptr->getOpcode() == ISD::ADD);
19815 Offset = Ptr->getOperand(1);
19816 return true;
19817 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19818 // AddressingMode 2
19819 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19820 int RHSC = (int)RHS->getZExtValue();
19821 if (RHSC < 0 && RHSC > -0x1000) {
19822 assert(Ptr->getOpcode() == ISD::ADD);
19823 isInc = false;
19824 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19825 Base = Ptr->getOperand(0);
19826 return true;
19827 }
19828 }
19829
19830 if (Ptr->getOpcode() == ISD::ADD) {
19831 isInc = true;
19832 ARM_AM::ShiftOpc ShOpcVal=
19833 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19834 if (ShOpcVal != ARM_AM::no_shift) {
19835 Base = Ptr->getOperand(1);
19836 Offset = Ptr->getOperand(0);
19837 } else {
19838 Base = Ptr->getOperand(0);
19839 Offset = Ptr->getOperand(1);
19840 }
19841 return true;
19842 }
19843
19844 isInc = (Ptr->getOpcode() == ISD::ADD);
19845 Base = Ptr->getOperand(0);
19846 Offset = Ptr->getOperand(1);
19847 return true;
19848 }
19849
19850 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19851 return false;
19852}
19853
19855 bool isSEXTLoad, SDValue &Base,
19856 SDValue &Offset, bool &isInc,
19857 SelectionDAG &DAG) {
19858 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19859 return false;
19860
19861 Base = Ptr->getOperand(0);
19862 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19863 int RHSC = (int)RHS->getZExtValue();
19864 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19865 assert(Ptr->getOpcode() == ISD::ADD);
19866 isInc = false;
19867 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19868 return true;
19869 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19870 isInc = Ptr->getOpcode() == ISD::ADD;
19871 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19872 return true;
19873 }
19874 }
19875
19876 return false;
19877}
19878
19879static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19880 bool isSEXTLoad, bool IsMasked, bool isLE,
19882 bool &isInc, SelectionDAG &DAG) {
19883 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19884 return false;
19885 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19886 return false;
19887
19888 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19889 // as opposed to a vldrw.32). This can allow extra addressing modes or
19890 // alignments for what is otherwise an equivalent instruction.
19891 bool CanChangeType = isLE && !IsMasked;
19892
19893 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19894 int RHSC = (int)RHS->getZExtValue();
19895
19896 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19897 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19898 assert(Ptr->getOpcode() == ISD::ADD);
19899 isInc = false;
19900 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19901 return true;
19902 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19903 isInc = Ptr->getOpcode() == ISD::ADD;
19904 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19905 return true;
19906 }
19907 return false;
19908 };
19909
19910 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19911 // (in BE/masked) type.
19912 Base = Ptr->getOperand(0);
19913 if (VT == MVT::v4i16) {
19914 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19915 return true;
19916 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19917 if (IsInRange(RHSC, 0x80, 1))
19918 return true;
19919 } else if (Alignment >= 4 &&
19920 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19921 IsInRange(RHSC, 0x80, 4))
19922 return true;
19923 else if (Alignment >= 2 &&
19924 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19925 IsInRange(RHSC, 0x80, 2))
19926 return true;
19927 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19928 return true;
19929 return false;
19930}
19931
19932/// getPreIndexedAddressParts - returns true by value, base pointer and
19933/// offset pointer and addressing mode by reference if the node's address
19934/// can be legally represented as pre-indexed load / store address.
19935bool
19937 SDValue &Offset,
19939 SelectionDAG &DAG) const {
19940 if (Subtarget->isThumb1Only())
19941 return false;
19942
19943 EVT VT;
19944 SDValue Ptr;
19945 Align Alignment;
19946 bool isSEXTLoad = false;
19947 bool IsMasked = false;
19948 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19949 Ptr = LD->getBasePtr();
19950 VT = LD->getMemoryVT();
19951 Alignment = LD->getAlign();
19952 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19953 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19954 Ptr = ST->getBasePtr();
19955 VT = ST->getMemoryVT();
19956 Alignment = ST->getAlign();
19957 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19958 Ptr = LD->getBasePtr();
19959 VT = LD->getMemoryVT();
19960 Alignment = LD->getAlign();
19961 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19962 IsMasked = true;
19963 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19964 Ptr = ST->getBasePtr();
19965 VT = ST->getMemoryVT();
19966 Alignment = ST->getAlign();
19967 IsMasked = true;
19968 } else
19969 return false;
19970
19971 bool isInc;
19972 bool isLegal = false;
19973 if (VT.isVector())
19974 isLegal = Subtarget->hasMVEIntegerOps() &&
19976 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19977 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19978 else {
19979 if (Subtarget->isThumb2())
19980 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19981 Offset, isInc, DAG);
19982 else
19983 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19984 Offset, isInc, DAG);
19985 }
19986 if (!isLegal)
19987 return false;
19988
19989 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19990 return true;
19991}
19992
19993/// getPostIndexedAddressParts - returns true by value, base pointer and
19994/// offset pointer and addressing mode by reference if this node can be
19995/// combined with a load / store to form a post-indexed load / store.
19997 SDValue &Base,
19998 SDValue &Offset,
20000 SelectionDAG &DAG) const {
20001 EVT VT;
20002 SDValue Ptr;
20003 Align Alignment;
20004 bool isSEXTLoad = false, isNonExt;
20005 bool IsMasked = false;
20006 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20007 VT = LD->getMemoryVT();
20008 Ptr = LD->getBasePtr();
20009 Alignment = LD->getAlign();
20010 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20011 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20012 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20013 VT = ST->getMemoryVT();
20014 Ptr = ST->getBasePtr();
20015 Alignment = ST->getAlign();
20016 isNonExt = !ST->isTruncatingStore();
20017 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
20018 VT = LD->getMemoryVT();
20019 Ptr = LD->getBasePtr();
20020 Alignment = LD->getAlign();
20021 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20022 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20023 IsMasked = true;
20024 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
20025 VT = ST->getMemoryVT();
20026 Ptr = ST->getBasePtr();
20027 Alignment = ST->getAlign();
20028 isNonExt = !ST->isTruncatingStore();
20029 IsMasked = true;
20030 } else
20031 return false;
20032
20033 if (Subtarget->isThumb1Only()) {
20034 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
20035 // must be non-extending/truncating, i32, with an offset of 4.
20036 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
20037 if (Op->getOpcode() != ISD::ADD || !isNonExt)
20038 return false;
20039 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20040 if (!RHS || RHS->getZExtValue() != 4)
20041 return false;
20042 if (Alignment < Align(4))
20043 return false;
20044
20045 Offset = Op->getOperand(1);
20046 Base = Op->getOperand(0);
20047 AM = ISD::POST_INC;
20048 return true;
20049 }
20050
20051 bool isInc;
20052 bool isLegal = false;
20053 if (VT.isVector())
20054 isLegal = Subtarget->hasMVEIntegerOps() &&
20055 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
20056 Subtarget->isLittle(), Base, Offset,
20057 isInc, DAG);
20058 else {
20059 if (Subtarget->isThumb2())
20060 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20061 isInc, DAG);
20062 else
20063 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20064 isInc, DAG);
20065 }
20066 if (!isLegal)
20067 return false;
20068
20069 if (Ptr != Base) {
20070 // Swap base ptr and offset to catch more post-index load / store when
20071 // it's legal. In Thumb2 mode, offset must be an immediate.
20072 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20073 !Subtarget->isThumb2())
20075
20076 // Post-indexed load / store update the base pointer.
20077 if (Ptr != Base)
20078 return false;
20079 }
20080
20081 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
20082 return true;
20083}
20084
20086 KnownBits &Known,
20087 const APInt &DemandedElts,
20088 const SelectionDAG &DAG,
20089 unsigned Depth) const {
20090 unsigned BitWidth = Known.getBitWidth();
20091 Known.resetAll();
20092 switch (Op.getOpcode()) {
20093 default: break;
20094 case ARMISD::ADDC:
20095 case ARMISD::ADDE:
20096 case ARMISD::SUBC:
20097 case ARMISD::SUBE:
20098 // Special cases when we convert a carry to a boolean.
20099 if (Op.getResNo() == 0) {
20100 SDValue LHS = Op.getOperand(0);
20101 SDValue RHS = Op.getOperand(1);
20102 // (ADDE 0, 0, C) will give us a single bit.
20103 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20106 return;
20107 }
20108 }
20109 break;
20110 case ARMISD::CMOV: {
20111 // Bits are known zero/one if known on the LHS and RHS.
20112 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20113 if (Known.isUnknown())
20114 return;
20115
20116 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20117 Known = Known.intersectWith(KnownRHS);
20118 return;
20119 }
20121 Intrinsic::ID IntID =
20122 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20123 switch (IntID) {
20124 default: return;
20125 case Intrinsic::arm_ldaex:
20126 case Intrinsic::arm_ldrex: {
20127 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20128 unsigned MemBits = VT.getScalarSizeInBits();
20129 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20130 return;
20131 }
20132 }
20133 }
20134 case ARMISD::BFI: {
20135 // Conservatively, we can recurse down the first operand
20136 // and just mask out all affected bits.
20137 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20138
20139 // The operand to BFI is already a mask suitable for removing the bits it
20140 // sets.
20141 const APInt &Mask = Op.getConstantOperandAPInt(2);
20142 Known.Zero &= Mask;
20143 Known.One &= Mask;
20144 return;
20145 }
20146 case ARMISD::VGETLANEs:
20147 case ARMISD::VGETLANEu: {
20148 const SDValue &SrcSV = Op.getOperand(0);
20149 EVT VecVT = SrcSV.getValueType();
20150 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20151 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20152 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20153 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20154 "VGETLANE index out of bounds");
20155 unsigned Idx = Pos->getZExtValue();
20156 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20157 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20158
20159 EVT VT = Op.getValueType();
20160 const unsigned DstSz = VT.getScalarSizeInBits();
20161 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20162 (void)SrcSz;
20163 assert(SrcSz == Known.getBitWidth());
20164 assert(DstSz > SrcSz);
20165 if (Op.getOpcode() == ARMISD::VGETLANEs)
20166 Known = Known.sext(DstSz);
20167 else {
20168 Known = Known.zext(DstSz);
20169 }
20170 assert(DstSz == Known.getBitWidth());
20171 break;
20172 }
20173 case ARMISD::VMOVrh: {
20174 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20175 assert(KnownOp.getBitWidth() == 16);
20176 Known = KnownOp.zext(32);
20177 break;
20178 }
20179 case ARMISD::CSINC:
20180 case ARMISD::CSINV:
20181 case ARMISD::CSNEG: {
20182 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20183 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20184
20185 // The result is either:
20186 // CSINC: KnownOp0 or KnownOp1 + 1
20187 // CSINV: KnownOp0 or ~KnownOp1
20188 // CSNEG: KnownOp0 or KnownOp1 * -1
20189 if (Op.getOpcode() == ARMISD::CSINC)
20190 KnownOp1 =
20191 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20192 else if (Op.getOpcode() == ARMISD::CSINV)
20193 std::swap(KnownOp1.Zero, KnownOp1.One);
20194 else if (Op.getOpcode() == ARMISD::CSNEG)
20195 KnownOp1 = KnownBits::mul(
20196 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20197
20198 Known = KnownOp0.intersectWith(KnownOp1);
20199 break;
20200 }
20201 }
20202}
20203
20205 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20206 TargetLoweringOpt &TLO) const {
20207 // Delay optimization, so we don't have to deal with illegal types, or block
20208 // optimizations.
20209 if (!TLO.LegalOps)
20210 return false;
20211
20212 // Only optimize AND for now.
20213 if (Op.getOpcode() != ISD::AND)
20214 return false;
20215
20216 EVT VT = Op.getValueType();
20217
20218 // Ignore vectors.
20219 if (VT.isVector())
20220 return false;
20221
20222 assert(VT == MVT::i32 && "Unexpected integer type");
20223
20224 // Make sure the RHS really is a constant.
20225 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20226 if (!C)
20227 return false;
20228
20229 unsigned Mask = C->getZExtValue();
20230
20231 unsigned Demanded = DemandedBits.getZExtValue();
20232 unsigned ShrunkMask = Mask & Demanded;
20233 unsigned ExpandedMask = Mask | ~Demanded;
20234
20235 // If the mask is all zeros, let the target-independent code replace the
20236 // result with zero.
20237 if (ShrunkMask == 0)
20238 return false;
20239
20240 // If the mask is all ones, erase the AND. (Currently, the target-independent
20241 // code won't do this, so we have to do it explicitly to avoid an infinite
20242 // loop in obscure cases.)
20243 if (ExpandedMask == ~0U)
20244 return TLO.CombineTo(Op, Op.getOperand(0));
20245
20246 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20247 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20248 };
20249 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20250 if (NewMask == Mask)
20251 return true;
20252 SDLoc DL(Op);
20253 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20254 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20255 return TLO.CombineTo(Op, NewOp);
20256 };
20257
20258 // Prefer uxtb mask.
20259 if (IsLegalMask(0xFF))
20260 return UseMask(0xFF);
20261
20262 // Prefer uxth mask.
20263 if (IsLegalMask(0xFFFF))
20264 return UseMask(0xFFFF);
20265
20266 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20267 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20268 if (ShrunkMask < 256)
20269 return UseMask(ShrunkMask);
20270
20271 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20272 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20273 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20274 return UseMask(ExpandedMask);
20275
20276 // Potential improvements:
20277 //
20278 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20279 // We could try to prefer Thumb1 immediates which can be lowered to a
20280 // two-instruction sequence.
20281 // We could try to recognize more legal ARM/Thumb2 immediates here.
20282
20283 return false;
20284}
20285
20287 SDValue Op, const APInt &OriginalDemandedBits,
20288 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20289 unsigned Depth) const {
20290 unsigned Opc = Op.getOpcode();
20291
20292 switch (Opc) {
20293 case ARMISD::ASRL:
20294 case ARMISD::LSRL: {
20295 // If this is result 0 and the other result is unused, see if the demand
20296 // bits allow us to shrink this long shift into a standard small shift in
20297 // the opposite direction.
20298 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20299 isa<ConstantSDNode>(Op->getOperand(2))) {
20300 unsigned ShAmt = Op->getConstantOperandVal(2);
20301 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20302 << (32 - ShAmt)))
20303 return TLO.CombineTo(
20304 Op, TLO.DAG.getNode(
20305 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20306 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20307 }
20308 break;
20309 }
20310 case ARMISD::VBICIMM: {
20311 SDValue Op0 = Op.getOperand(0);
20312 unsigned ModImm = Op.getConstantOperandVal(1);
20313 unsigned EltBits = 0;
20314 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20315 if ((OriginalDemandedBits & Mask) == 0)
20316 return TLO.CombineTo(Op, Op0);
20317 }
20318 }
20319
20321 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20322}
20323
20324//===----------------------------------------------------------------------===//
20325// ARM Inline Assembly Support
20326//===----------------------------------------------------------------------===//
20327
20329 // Looking for "rev" which is V6+.
20330 if (!Subtarget->hasV6Ops())
20331 return false;
20332
20333 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20334 StringRef AsmStr = IA->getAsmString();
20335 SmallVector<StringRef, 4> AsmPieces;
20336 SplitString(AsmStr, AsmPieces, ";\n");
20337
20338 switch (AsmPieces.size()) {
20339 default: return false;
20340 case 1:
20341 AsmStr = AsmPieces[0];
20342 AsmPieces.clear();
20343 SplitString(AsmStr, AsmPieces, " \t,");
20344
20345 // rev $0, $1
20346 if (AsmPieces.size() == 3 &&
20347 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20348 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20349 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20350 if (Ty && Ty->getBitWidth() == 32)
20352 }
20353 break;
20354 }
20355
20356 return false;
20357}
20358
20359const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20360 // At this point, we have to lower this constraint to something else, so we
20361 // lower it to an "r" or "w". However, by doing this we will force the result
20362 // to be in register, while the X constraint is much more permissive.
20363 //
20364 // Although we are correct (we are free to emit anything, without
20365 // constraints), we might break use cases that would expect us to be more
20366 // efficient and emit something else.
20367 if (!Subtarget->hasVFP2Base())
20368 return "r";
20369 if (ConstraintVT.isFloatingPoint())
20370 return "w";
20371 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20372 (ConstraintVT.getSizeInBits() == 64 ||
20373 ConstraintVT.getSizeInBits() == 128))
20374 return "w";
20375
20376 return "r";
20377}
20378
20379/// getConstraintType - Given a constraint letter, return the type of
20380/// constraint it is for this target.
20383 unsigned S = Constraint.size();
20384 if (S == 1) {
20385 switch (Constraint[0]) {
20386 default: break;
20387 case 'l': return C_RegisterClass;
20388 case 'w': return C_RegisterClass;
20389 case 'h': return C_RegisterClass;
20390 case 'x': return C_RegisterClass;
20391 case 't': return C_RegisterClass;
20392 case 'j': return C_Immediate; // Constant for movw.
20393 // An address with a single base register. Due to the way we
20394 // currently handle addresses it is the same as an 'r' memory constraint.
20395 case 'Q': return C_Memory;
20396 }
20397 } else if (S == 2) {
20398 switch (Constraint[0]) {
20399 default: break;
20400 case 'T': return C_RegisterClass;
20401 // All 'U+' constraints are addresses.
20402 case 'U': return C_Memory;
20403 }
20404 }
20405 return TargetLowering::getConstraintType(Constraint);
20406}
20407
20408/// Examine constraint type and operand type and determine a weight value.
20409/// This object must already have been set up with the operand type
20410/// and the current alternative constraint selected.
20413 AsmOperandInfo &info, const char *constraint) const {
20415 Value *CallOperandVal = info.CallOperandVal;
20416 // If we don't have a value, we can't do a match,
20417 // but allow it at the lowest weight.
20418 if (!CallOperandVal)
20419 return CW_Default;
20420 Type *type = CallOperandVal->getType();
20421 // Look at the constraint type.
20422 switch (*constraint) {
20423 default:
20425 break;
20426 case 'l':
20427 if (type->isIntegerTy()) {
20428 if (Subtarget->isThumb())
20429 weight = CW_SpecificReg;
20430 else
20431 weight = CW_Register;
20432 }
20433 break;
20434 case 'w':
20435 if (type->isFloatingPointTy())
20436 weight = CW_Register;
20437 break;
20438 }
20439 return weight;
20440}
20441
20442using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20443
20445 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20446 switch (Constraint.size()) {
20447 case 1:
20448 // GCC ARM Constraint Letters
20449 switch (Constraint[0]) {
20450 case 'l': // Low regs or general regs.
20451 if (Subtarget->isThumb())
20452 return RCPair(0U, &ARM::tGPRRegClass);
20453 return RCPair(0U, &ARM::GPRRegClass);
20454 case 'h': // High regs or no regs.
20455 if (Subtarget->isThumb())
20456 return RCPair(0U, &ARM::hGPRRegClass);
20457 break;
20458 case 'r':
20459 if (Subtarget->isThumb1Only())
20460 return RCPair(0U, &ARM::tGPRRegClass);
20461 return RCPair(0U, &ARM::GPRRegClass);
20462 case 'w':
20463 if (VT == MVT::Other)
20464 break;
20465 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20466 return RCPair(0U, &ARM::SPRRegClass);
20467 if (VT.getSizeInBits() == 64)
20468 return RCPair(0U, &ARM::DPRRegClass);
20469 if (VT.getSizeInBits() == 128)
20470 return RCPair(0U, &ARM::QPRRegClass);
20471 break;
20472 case 'x':
20473 if (VT == MVT::Other)
20474 break;
20475 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20476 return RCPair(0U, &ARM::SPR_8RegClass);
20477 if (VT.getSizeInBits() == 64)
20478 return RCPair(0U, &ARM::DPR_8RegClass);
20479 if (VT.getSizeInBits() == 128)
20480 return RCPair(0U, &ARM::QPR_8RegClass);
20481 break;
20482 case 't':
20483 if (VT == MVT::Other)
20484 break;
20485 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20486 return RCPair(0U, &ARM::SPRRegClass);
20487 if (VT.getSizeInBits() == 64)
20488 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20489 if (VT.getSizeInBits() == 128)
20490 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20491 break;
20492 }
20493 break;
20494
20495 case 2:
20496 if (Constraint[0] == 'T') {
20497 switch (Constraint[1]) {
20498 default:
20499 break;
20500 case 'e':
20501 return RCPair(0U, &ARM::tGPREvenRegClass);
20502 case 'o':
20503 return RCPair(0U, &ARM::tGPROddRegClass);
20504 }
20505 }
20506 break;
20507
20508 default:
20509 break;
20510 }
20511
20512 if (StringRef("{cc}").equals_insensitive(Constraint))
20513 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20514
20515 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20516}
20517
20518/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20519/// vector. If it is invalid, don't add anything to Ops.
20521 StringRef Constraint,
20522 std::vector<SDValue> &Ops,
20523 SelectionDAG &DAG) const {
20524 SDValue Result;
20525
20526 // Currently only support length 1 constraints.
20527 if (Constraint.size() != 1)
20528 return;
20529
20530 char ConstraintLetter = Constraint[0];
20531 switch (ConstraintLetter) {
20532 default: break;
20533 case 'j':
20534 case 'I': case 'J': case 'K': case 'L':
20535 case 'M': case 'N': case 'O':
20536 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20537 if (!C)
20538 return;
20539
20540 int64_t CVal64 = C->getSExtValue();
20541 int CVal = (int) CVal64;
20542 // None of these constraints allow values larger than 32 bits. Check
20543 // that the value fits in an int.
20544 if (CVal != CVal64)
20545 return;
20546
20547 switch (ConstraintLetter) {
20548 case 'j':
20549 // Constant suitable for movw, must be between 0 and
20550 // 65535.
20551 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20552 if (CVal >= 0 && CVal <= 65535)
20553 break;
20554 return;
20555 case 'I':
20556 if (Subtarget->isThumb1Only()) {
20557 // This must be a constant between 0 and 255, for ADD
20558 // immediates.
20559 if (CVal >= 0 && CVal <= 255)
20560 break;
20561 } else if (Subtarget->isThumb2()) {
20562 // A constant that can be used as an immediate value in a
20563 // data-processing instruction.
20564 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20565 break;
20566 } else {
20567 // A constant that can be used as an immediate value in a
20568 // data-processing instruction.
20569 if (ARM_AM::getSOImmVal(CVal) != -1)
20570 break;
20571 }
20572 return;
20573
20574 case 'J':
20575 if (Subtarget->isThumb1Only()) {
20576 // This must be a constant between -255 and -1, for negated ADD
20577 // immediates. This can be used in GCC with an "n" modifier that
20578 // prints the negated value, for use with SUB instructions. It is
20579 // not useful otherwise but is implemented for compatibility.
20580 if (CVal >= -255 && CVal <= -1)
20581 break;
20582 } else {
20583 // This must be a constant between -4095 and 4095. It is not clear
20584 // what this constraint is intended for. Implemented for
20585 // compatibility with GCC.
20586 if (CVal >= -4095 && CVal <= 4095)
20587 break;
20588 }
20589 return;
20590
20591 case 'K':
20592 if (Subtarget->isThumb1Only()) {
20593 // A 32-bit value where only one byte has a nonzero value. Exclude
20594 // zero to match GCC. This constraint is used by GCC internally for
20595 // constants that can be loaded with a move/shift combination.
20596 // It is not useful otherwise but is implemented for compatibility.
20597 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20598 break;
20599 } else if (Subtarget->isThumb2()) {
20600 // A constant whose bitwise inverse can be used as an immediate
20601 // value in a data-processing instruction. This can be used in GCC
20602 // with a "B" modifier that prints the inverted value, for use with
20603 // BIC and MVN instructions. It is not useful otherwise but is
20604 // implemented for compatibility.
20605 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20606 break;
20607 } else {
20608 // A constant whose bitwise inverse can be used as an immediate
20609 // value in a data-processing instruction. This can be used in GCC
20610 // with a "B" modifier that prints the inverted value, for use with
20611 // BIC and MVN instructions. It is not useful otherwise but is
20612 // implemented for compatibility.
20613 if (ARM_AM::getSOImmVal(~CVal) != -1)
20614 break;
20615 }
20616 return;
20617
20618 case 'L':
20619 if (Subtarget->isThumb1Only()) {
20620 // This must be a constant between -7 and 7,
20621 // for 3-operand ADD/SUB immediate instructions.
20622 if (CVal >= -7 && CVal < 7)
20623 break;
20624 } else if (Subtarget->isThumb2()) {
20625 // A constant whose negation can be used as an immediate value in a
20626 // data-processing instruction. This can be used in GCC with an "n"
20627 // modifier that prints the negated value, for use with SUB
20628 // instructions. It is not useful otherwise but is implemented for
20629 // compatibility.
20630 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20631 break;
20632 } else {
20633 // A constant whose negation can be used as an immediate value in a
20634 // data-processing instruction. This can be used in GCC with an "n"
20635 // modifier that prints the negated value, for use with SUB
20636 // instructions. It is not useful otherwise but is implemented for
20637 // compatibility.
20638 if (ARM_AM::getSOImmVal(-CVal) != -1)
20639 break;
20640 }
20641 return;
20642
20643 case 'M':
20644 if (Subtarget->isThumb1Only()) {
20645 // This must be a multiple of 4 between 0 and 1020, for
20646 // ADD sp + immediate.
20647 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20648 break;
20649 } else {
20650 // A power of two or a constant between 0 and 32. This is used in
20651 // GCC for the shift amount on shifted register operands, but it is
20652 // useful in general for any shift amounts.
20653 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20654 break;
20655 }
20656 return;
20657
20658 case 'N':
20659 if (Subtarget->isThumb1Only()) {
20660 // This must be a constant between 0 and 31, for shift amounts.
20661 if (CVal >= 0 && CVal <= 31)
20662 break;
20663 }
20664 return;
20665
20666 case 'O':
20667 if (Subtarget->isThumb1Only()) {
20668 // This must be a multiple of 4 between -508 and 508, for
20669 // ADD/SUB sp = sp + immediate.
20670 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20671 break;
20672 }
20673 return;
20674 }
20675 Result = DAG.getSignedConstant(CVal, SDLoc(Op), Op.getValueType(),
20676 /*isTarget=*/true);
20677 break;
20678 }
20679
20680 if (Result.getNode()) {
20681 Ops.push_back(Result);
20682 return;
20683 }
20684 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20685}
20686
20688 const SDNode *N, MVT::SimpleValueType SVT) {
20689 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20690 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20691 "Unhandled Opcode in getDivRemLibcall");
20692 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20693 N->getOpcode() == ISD::SREM;
20694 RTLIB::Libcall LC;
20695 switch (SVT) {
20696 default: llvm_unreachable("Unexpected request for libcall!");
20697 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20698 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20699 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20700 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20701 }
20702 return LC;
20703}
20704
20706 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20707 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20708 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20709 "Unhandled Opcode in getDivRemArgList");
20710 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20711 N->getOpcode() == ISD::SREM;
20714 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20715 EVT ArgVT = N->getOperand(i).getValueType();
20716 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20717 Entry.Node = N->getOperand(i);
20718 Entry.Ty = ArgTy;
20719 Entry.IsSExt = isSigned;
20720 Entry.IsZExt = !isSigned;
20721 Args.push_back(Entry);
20722 }
20723 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20724 std::swap(Args[0], Args[1]);
20725 return Args;
20726}
20727
20728SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20729 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20730 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20731 Subtarget->isTargetWindows()) &&
20732 "Register-based DivRem lowering only");
20733 unsigned Opcode = Op->getOpcode();
20734 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20735 "Invalid opcode for Div/Rem lowering");
20736 bool isSigned = (Opcode == ISD::SDIVREM);
20737 EVT VT = Op->getValueType(0);
20738 SDLoc dl(Op);
20739
20740 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20742 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20743 SDValue Res0 =
20744 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20745 SDValue Res1 =
20746 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20747 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20748 {Res0, Res1});
20749 }
20750 }
20751
20752 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20753
20754 // If the target has hardware divide, use divide + multiply + subtract:
20755 // div = a / b
20756 // rem = a - b * div
20757 // return {div, rem}
20758 // This should be lowered into UDIV/SDIV + MLS later on.
20759 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20760 : Subtarget->hasDivideInARMMode();
20761 if (hasDivide && Op->getValueType(0).isSimple() &&
20762 Op->getSimpleValueType(0) == MVT::i32) {
20763 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20764 const SDValue Dividend = Op->getOperand(0);
20765 const SDValue Divisor = Op->getOperand(1);
20766 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20767 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20768 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20769
20770 SDValue Values[2] = {Div, Rem};
20771 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20772 }
20773
20774 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20775 VT.getSimpleVT().SimpleTy);
20776 SDValue InChain = DAG.getEntryNode();
20777
20779 DAG.getContext(),
20780 Subtarget);
20781
20784
20785 Type *RetTy = StructType::get(Ty, Ty);
20786
20787 if (Subtarget->isTargetWindows())
20788 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20789
20791 CLI.setDebugLoc(dl).setChain(InChain)
20792 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20794
20795 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20796 return CallInfo.first;
20797}
20798
20799// Lowers REM using divmod helpers
20800// see RTABI section 4.2/4.3
20801SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20802 EVT VT = N->getValueType(0);
20803
20804 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20806 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20807 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20808 Result[0], Result[1]);
20809 }
20810
20811 // Build return types (div and rem)
20812 std::vector<Type*> RetTyParams;
20813 Type *RetTyElement;
20814
20815 switch (VT.getSimpleVT().SimpleTy) {
20816 default: llvm_unreachable("Unexpected request for libcall!");
20817 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20818 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20819 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20820 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20821 }
20822
20823 RetTyParams.push_back(RetTyElement);
20824 RetTyParams.push_back(RetTyElement);
20825 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20826 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20827
20828 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20829 SimpleTy);
20830 SDValue InChain = DAG.getEntryNode();
20832 Subtarget);
20833 bool isSigned = N->getOpcode() == ISD::SREM;
20836
20837 if (Subtarget->isTargetWindows())
20838 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20839
20840 // Lower call
20841 CallLoweringInfo CLI(DAG);
20842 CLI.setChain(InChain)
20843 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20845 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20846
20847 // Return second (rem) result operand (first contains div)
20848 SDNode *ResNode = CallResult.first.getNode();
20849 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20850 return ResNode->getOperand(1);
20851}
20852
20853SDValue
20854ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20855 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20856 SDLoc DL(Op);
20857
20858 // Get the inputs.
20859 SDValue Chain = Op.getOperand(0);
20860 SDValue Size = Op.getOperand(1);
20861
20863 "no-stack-arg-probe")) {
20865 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20866 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20867 Chain = SP.getValue(1);
20868 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20869 if (Align)
20870 SP =
20871 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20872 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20873 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20874 SDValue Ops[2] = { SP, Chain };
20875 return DAG.getMergeValues(Ops, DL);
20876 }
20877
20878 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20879 DAG.getConstant(2, DL, MVT::i32));
20880
20881 SDValue Glue;
20882 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20883 Glue = Chain.getValue(1);
20884
20885 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20886 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20887
20888 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20889 Chain = NewSP.getValue(1);
20890
20891 SDValue Ops[2] = { NewSP, Chain };
20892 return DAG.getMergeValues(Ops, DL);
20893}
20894
20895SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20896 bool IsStrict = Op->isStrictFPOpcode();
20897 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20898 const unsigned DstSz = Op.getValueType().getSizeInBits();
20899 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20900 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20901 "Unexpected type for custom-lowering FP_EXTEND");
20902
20903 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20904 "With both FP DP and 16, any FP conversion is legal!");
20905
20906 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20907 "With FP16, 16 to 32 conversion is legal!");
20908
20909 // Converting from 32 -> 64 is valid if we have FP64.
20910 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20911 // FIXME: Remove this when we have strict fp instruction selection patterns
20912 if (IsStrict) {
20913 SDLoc Loc(Op);
20915 Loc, Op.getValueType(), SrcVal);
20916 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20917 }
20918 return Op;
20919 }
20920
20921 // Either we are converting from 16 -> 64, without FP16 and/or
20922 // FP.double-precision or without Armv8-fp. So we must do it in two
20923 // steps.
20924 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20925 // without FP16. So we must do a function call.
20926 SDLoc Loc(Op);
20927 RTLIB::Libcall LC;
20928 MakeLibCallOptions CallOptions;
20929 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20930 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20931 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20932 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20933 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20934 if (Supported) {
20935 if (IsStrict) {
20936 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20937 {DstVT, MVT::Other}, {Chain, SrcVal});
20938 Chain = SrcVal.getValue(1);
20939 } else {
20940 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20941 }
20942 } else {
20943 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20944 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20945 "Unexpected type for custom-lowering FP_EXTEND");
20946 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20947 Loc, Chain);
20948 }
20949 }
20950
20951 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20952}
20953
20954SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20955 bool IsStrict = Op->isStrictFPOpcode();
20956
20957 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20958 EVT SrcVT = SrcVal.getValueType();
20959 EVT DstVT = Op.getValueType();
20960 const unsigned DstSz = Op.getValueType().getSizeInBits();
20961 const unsigned SrcSz = SrcVT.getSizeInBits();
20962 (void)DstSz;
20963 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20964 "Unexpected type for custom-lowering FP_ROUND");
20965
20966 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20967 "With both FP DP and 16, any FP conversion is legal!");
20968
20969 SDLoc Loc(Op);
20970
20971 // Instruction from 32 -> 16 if hasFP16 is valid
20972 if (SrcSz == 32 && Subtarget->hasFP16())
20973 return Op;
20974
20975 // Lib call from 32 -> 16 / 64 -> [32, 16]
20976 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20977 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20978 "Unexpected type for custom-lowering FP_ROUND");
20979 MakeLibCallOptions CallOptions;
20980 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20982 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20983 Loc, Chain);
20984 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20985}
20986
20987bool
20989 // The ARM target isn't yet aware of offsets.
20990 return false;
20991}
20992
20994 if (v == 0xffffffff)
20995 return false;
20996
20997 // there can be 1's on either or both "outsides", all the "inside"
20998 // bits must be 0's
20999 return isShiftedMask_32(~v);
21000}
21001
21002/// isFPImmLegal - Returns true if the target can instruction select the
21003/// specified FP immediate natively. If false, the legalizer will
21004/// materialize the FP immediate as a load from a constant pool.
21006 bool ForCodeSize) const {
21007 if (!Subtarget->hasVFP3Base())
21008 return false;
21009 if (VT == MVT::f16 && Subtarget->hasFullFP16())
21010 return ARM_AM::getFP16Imm(Imm) != -1;
21011 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
21012 ARM_AM::getFP32FP16Imm(Imm) != -1)
21013 return true;
21014 if (VT == MVT::f32)
21015 return ARM_AM::getFP32Imm(Imm) != -1;
21016 if (VT == MVT::f64 && Subtarget->hasFP64())
21017 return ARM_AM::getFP64Imm(Imm) != -1;
21018 return false;
21019}
21020
21021/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
21022/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
21023/// specified in the intrinsic calls.
21025 const CallInst &I,
21026 MachineFunction &MF,
21027 unsigned Intrinsic) const {
21028 switch (Intrinsic) {
21029 case Intrinsic::arm_neon_vld1:
21030 case Intrinsic::arm_neon_vld2:
21031 case Intrinsic::arm_neon_vld3:
21032 case Intrinsic::arm_neon_vld4:
21033 case Intrinsic::arm_neon_vld2lane:
21034 case Intrinsic::arm_neon_vld3lane:
21035 case Intrinsic::arm_neon_vld4lane:
21036 case Intrinsic::arm_neon_vld2dup:
21037 case Intrinsic::arm_neon_vld3dup:
21038 case Intrinsic::arm_neon_vld4dup: {
21040 // Conservatively set memVT to the entire set of vectors loaded.
21041 auto &DL = I.getDataLayout();
21042 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21043 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21044 Info.ptrVal = I.getArgOperand(0);
21045 Info.offset = 0;
21046 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21047 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21048 // volatile loads with NEON intrinsics not supported
21050 return true;
21051 }
21052 case Intrinsic::arm_neon_vld1x2:
21053 case Intrinsic::arm_neon_vld1x3:
21054 case Intrinsic::arm_neon_vld1x4: {
21056 // Conservatively set memVT to the entire set of vectors loaded.
21057 auto &DL = I.getDataLayout();
21058 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21059 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21060 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21061 Info.offset = 0;
21062 Info.align.reset();
21063 // volatile loads with NEON intrinsics not supported
21065 return true;
21066 }
21067 case Intrinsic::arm_neon_vst1:
21068 case Intrinsic::arm_neon_vst2:
21069 case Intrinsic::arm_neon_vst3:
21070 case Intrinsic::arm_neon_vst4:
21071 case Intrinsic::arm_neon_vst2lane:
21072 case Intrinsic::arm_neon_vst3lane:
21073 case Intrinsic::arm_neon_vst4lane: {
21075 // Conservatively set memVT to the entire set of vectors stored.
21076 auto &DL = I.getDataLayout();
21077 unsigned NumElts = 0;
21078 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21079 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21080 if (!ArgTy->isVectorTy())
21081 break;
21082 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21083 }
21084 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21085 Info.ptrVal = I.getArgOperand(0);
21086 Info.offset = 0;
21087 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21088 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21089 // volatile stores with NEON intrinsics not supported
21091 return true;
21092 }
21093 case Intrinsic::arm_neon_vst1x2:
21094 case Intrinsic::arm_neon_vst1x3:
21095 case Intrinsic::arm_neon_vst1x4: {
21097 // Conservatively set memVT to the entire set of vectors stored.
21098 auto &DL = I.getDataLayout();
21099 unsigned NumElts = 0;
21100 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21101 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21102 if (!ArgTy->isVectorTy())
21103 break;
21104 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21105 }
21106 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21107 Info.ptrVal = I.getArgOperand(0);
21108 Info.offset = 0;
21109 Info.align.reset();
21110 // volatile stores with NEON intrinsics not supported
21112 return true;
21113 }
21114 case Intrinsic::arm_mve_vld2q:
21115 case Intrinsic::arm_mve_vld4q: {
21117 // Conservatively set memVT to the entire set of vectors loaded.
21118 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21119 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21120 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21121 Info.ptrVal = I.getArgOperand(0);
21122 Info.offset = 0;
21123 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21124 // volatile loads with MVE intrinsics not supported
21126 return true;
21127 }
21128 case Intrinsic::arm_mve_vst2q:
21129 case Intrinsic::arm_mve_vst4q: {
21131 // Conservatively set memVT to the entire set of vectors stored.
21132 Type *VecTy = I.getArgOperand(1)->getType();
21133 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21134 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21135 Info.ptrVal = I.getArgOperand(0);
21136 Info.offset = 0;
21137 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21138 // volatile stores with MVE intrinsics not supported
21140 return true;
21141 }
21142 case Intrinsic::arm_mve_vldr_gather_base:
21143 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21145 Info.ptrVal = nullptr;
21146 Info.memVT = MVT::getVT(I.getType());
21147 Info.align = Align(1);
21149 return true;
21150 }
21151 case Intrinsic::arm_mve_vldr_gather_base_wb:
21152 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21154 Info.ptrVal = nullptr;
21155 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21156 Info.align = Align(1);
21158 return true;
21159 }
21160 case Intrinsic::arm_mve_vldr_gather_offset:
21161 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21163 Info.ptrVal = nullptr;
21164 MVT DataVT = MVT::getVT(I.getType());
21165 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21166 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21167 DataVT.getVectorNumElements());
21168 Info.align = Align(1);
21170 return true;
21171 }
21172 case Intrinsic::arm_mve_vstr_scatter_base:
21173 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21175 Info.ptrVal = nullptr;
21176 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21177 Info.align = Align(1);
21179 return true;
21180 }
21181 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21182 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21184 Info.ptrVal = nullptr;
21185 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21186 Info.align = Align(1);
21188 return true;
21189 }
21190 case Intrinsic::arm_mve_vstr_scatter_offset:
21191 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21193 Info.ptrVal = nullptr;
21194 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21195 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21196 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21197 DataVT.getVectorNumElements());
21198 Info.align = Align(1);
21200 return true;
21201 }
21202 case Intrinsic::arm_ldaex:
21203 case Intrinsic::arm_ldrex: {
21204 auto &DL = I.getDataLayout();
21205 Type *ValTy = I.getParamElementType(0);
21207 Info.memVT = MVT::getVT(ValTy);
21208 Info.ptrVal = I.getArgOperand(0);
21209 Info.offset = 0;
21210 Info.align = DL.getABITypeAlign(ValTy);
21212 return true;
21213 }
21214 case Intrinsic::arm_stlex:
21215 case Intrinsic::arm_strex: {
21216 auto &DL = I.getDataLayout();
21217 Type *ValTy = I.getParamElementType(1);
21219 Info.memVT = MVT::getVT(ValTy);
21220 Info.ptrVal = I.getArgOperand(1);
21221 Info.offset = 0;
21222 Info.align = DL.getABITypeAlign(ValTy);
21224 return true;
21225 }
21226 case Intrinsic::arm_stlexd:
21227 case Intrinsic::arm_strexd:
21229 Info.memVT = MVT::i64;
21230 Info.ptrVal = I.getArgOperand(2);
21231 Info.offset = 0;
21232 Info.align = Align(8);
21234 return true;
21235
21236 case Intrinsic::arm_ldaexd:
21237 case Intrinsic::arm_ldrexd:
21239 Info.memVT = MVT::i64;
21240 Info.ptrVal = I.getArgOperand(0);
21241 Info.offset = 0;
21242 Info.align = Align(8);
21244 return true;
21245
21246 default:
21247 break;
21248 }
21249
21250 return false;
21251}
21252
21253/// Returns true if it is beneficial to convert a load of a constant
21254/// to just the constant itself.
21256 Type *Ty) const {
21257 assert(Ty->isIntegerTy());
21258
21259 unsigned Bits = Ty->getPrimitiveSizeInBits();
21260 if (Bits == 0 || Bits > 32)
21261 return false;
21262 return true;
21263}
21264
21266 unsigned Index) const {
21268 return false;
21269
21270 return (Index == 0 || Index == ResVT.getVectorNumElements());
21271}
21272
21274 ARM_MB::MemBOpt Domain) const {
21275 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21276
21277 // First, if the target has no DMB, see what fallback we can use.
21278 if (!Subtarget->hasDataBarrier()) {
21279 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21280 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21281 // here.
21282 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21283 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21284 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21285 Builder.getInt32(0), Builder.getInt32(7),
21286 Builder.getInt32(10), Builder.getInt32(5)};
21287 return Builder.CreateCall(MCR, args);
21288 } else {
21289 // Instead of using barriers, atomic accesses on these subtargets use
21290 // libcalls.
21291 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21292 }
21293 } else {
21294 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21295 // Only a full system barrier exists in the M-class architectures.
21296 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21297 Constant *CDomain = Builder.getInt32(Domain);
21298 return Builder.CreateCall(DMB, CDomain);
21299 }
21300}
21301
21302// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21304 Instruction *Inst,
21305 AtomicOrdering Ord) const {
21306 switch (Ord) {
21309 llvm_unreachable("Invalid fence: unordered/non-atomic");
21312 return nullptr; // Nothing to do
21314 if (!Inst->hasAtomicStore())
21315 return nullptr; // Nothing to do
21316 [[fallthrough]];
21319 if (Subtarget->preferISHSTBarriers())
21320 return makeDMB(Builder, ARM_MB::ISHST);
21321 // FIXME: add a comment with a link to documentation justifying this.
21322 else
21323 return makeDMB(Builder, ARM_MB::ISH);
21324 }
21325 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21326}
21327
21329 Instruction *Inst,
21330 AtomicOrdering Ord) const {
21331 switch (Ord) {
21334 llvm_unreachable("Invalid fence: unordered/not-atomic");
21337 return nullptr; // Nothing to do
21341 return makeDMB(Builder, ARM_MB::ISH);
21342 }
21343 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21344}
21345
21346// Loads and stores less than 64-bits are already atomic; ones above that
21347// are doomed anyway, so defer to the default libcall and blame the OS when
21348// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21349// anything for those.
21352 bool has64BitAtomicStore;
21353 if (Subtarget->isMClass())
21354 has64BitAtomicStore = false;
21355 else if (Subtarget->isThumb())
21356 has64BitAtomicStore = Subtarget->hasV7Ops();
21357 else
21358 has64BitAtomicStore = Subtarget->hasV6Ops();
21359
21360 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21361 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21363}
21364
21365// Loads and stores less than 64-bits are already atomic; ones above that
21366// are doomed anyway, so defer to the default libcall and blame the OS when
21367// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21368// anything for those.
21369// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21370// guarantee, see DDI0406C ARM architecture reference manual,
21371// sections A8.8.72-74 LDRD)
21374 bool has64BitAtomicLoad;
21375 if (Subtarget->isMClass())
21376 has64BitAtomicLoad = false;
21377 else if (Subtarget->isThumb())
21378 has64BitAtomicLoad = Subtarget->hasV7Ops();
21379 else
21380 has64BitAtomicLoad = Subtarget->hasV6Ops();
21381
21382 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21383 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21385}
21386
21387// For the real atomic operations, we have ldrex/strex up to 32 bits,
21388// and up to 64 bits on the non-M profiles
21391 if (AI->isFloatingPointOperation())
21393
21394 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21395 bool hasAtomicRMW;
21396 if (Subtarget->isMClass())
21397 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21398 else if (Subtarget->isThumb())
21399 hasAtomicRMW = Subtarget->hasV7Ops();
21400 else
21401 hasAtomicRMW = Subtarget->hasV6Ops();
21402 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21403 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21404 // implement atomicrmw without spilling. If the target address is also on
21405 // the stack and close enough to the spill slot, this can lead to a
21406 // situation where the monitor always gets cleared and the atomic operation
21407 // can never succeed. So at -O0 lower this operation to a CAS loop.
21408 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21411 }
21413}
21414
21415// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21416// bits, and up to 64 bits on the non-M profiles.
21419 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21420 // implement cmpxchg without spilling. If the address being exchanged is also
21421 // on the stack and close enough to the spill slot, this can lead to a
21422 // situation where the monitor always gets cleared and the atomic operation
21423 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21424 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21425 bool HasAtomicCmpXchg;
21426 if (Subtarget->isMClass())
21427 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21428 else if (Subtarget->isThumb())
21429 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21430 else
21431 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21432 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21433 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21436}
21437
21439 const Instruction *I) const {
21440 return InsertFencesForAtomic;
21441}
21442
21444 // ROPI/RWPI are not supported currently.
21445 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21446}
21447
21449 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21451
21452 // MSVC CRT has a global variable holding security cookie.
21453 M.getOrInsertGlobal("__security_cookie",
21454 PointerType::getUnqual(M.getContext()));
21455
21456 // MSVC CRT has a function to validate security cookie.
21457 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21458 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21459 PointerType::getUnqual(M.getContext()));
21460 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21461 F->addParamAttr(0, Attribute::AttrKind::InReg);
21462}
21463
21465 // MSVC CRT has a global variable holding security cookie.
21466 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21467 return M.getGlobalVariable("__security_cookie");
21469}
21470
21472 // MSVC CRT has a function to validate security cookie.
21473 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21474 return M.getFunction("__security_check_cookie");
21476}
21477
21479 unsigned &Cost) const {
21480 // If we do not have NEON, vector types are not natively supported.
21481 if (!Subtarget->hasNEON())
21482 return false;
21483
21484 // Floating point values and vector values map to the same register file.
21485 // Therefore, although we could do a store extract of a vector type, this is
21486 // better to leave at float as we have more freedom in the addressing mode for
21487 // those.
21488 if (VectorTy->isFPOrFPVectorTy())
21489 return false;
21490
21491 // If the index is unknown at compile time, this is very expensive to lower
21492 // and it is not possible to combine the store with the extract.
21493 if (!isa<ConstantInt>(Idx))
21494 return false;
21495
21496 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21497 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21498 // We can do a store + vector extract on any vector that fits perfectly in a D
21499 // or Q register.
21500 if (BitWidth == 64 || BitWidth == 128) {
21501 Cost = 0;
21502 return true;
21503 }
21504 return false;
21505}
21506
21508 return Subtarget->hasV6T2Ops();
21509}
21510
21512 return Subtarget->hasV6T2Ops();
21513}
21514
21516 const Instruction &AndI) const {
21517 if (!Subtarget->hasV7Ops())
21518 return false;
21519
21520 // Sink the `and` instruction only if the mask would fit into a modified
21521 // immediate operand.
21522 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21523 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21524 return false;
21525 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21526 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21527 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21528}
21529
21532 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21533 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21536 ExpansionFactor);
21537}
21538
21540 Value *Addr,
21541 AtomicOrdering Ord) const {
21542 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21543 bool IsAcquire = isAcquireOrStronger(Ord);
21544
21545 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21546 // intrinsic must return {i32, i32} and we have to recombine them into a
21547 // single i64 here.
21548 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21550 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21552
21553 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21554
21555 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21556 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21557 if (!Subtarget->isLittle())
21558 std::swap (Lo, Hi);
21559 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21560 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21561 return Builder.CreateOr(
21562 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21563 }
21564
21565 Type *Tys[] = { Addr->getType() };
21566 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21567 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21568 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21569
21570 CI->addParamAttr(
21571 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21572 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21573}
21574
21576 IRBuilderBase &Builder) const {
21577 if (!Subtarget->hasV7Ops())
21578 return;
21579 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21580 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21581}
21582
21584 Value *Val, Value *Addr,
21585 AtomicOrdering Ord) const {
21586 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21587 bool IsRelease = isReleaseOrStronger(Ord);
21588
21589 // Since the intrinsics must have legal type, the i64 intrinsics take two
21590 // parameters: "i32, i32". We must marshal Val into the appropriate form
21591 // before the call.
21592 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21594 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21596 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21597
21598 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21599 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21600 if (!Subtarget->isLittle())
21601 std::swap(Lo, Hi);
21602 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21603 }
21604
21605 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21606 Type *Tys[] = { Addr->getType() };
21607 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21608
21609 CallInst *CI = Builder.CreateCall(
21610 Strex, {Builder.CreateZExtOrBitCast(
21611 Val, Strex->getFunctionType()->getParamType(0)),
21612 Addr});
21613 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21614 Val->getType()));
21615 return CI;
21616}
21617
21618
21620 return Subtarget->isMClass();
21621}
21622
21623/// A helper function for determining the number of interleaved accesses we
21624/// will generate when lowering accesses of the given type.
21625unsigned
21627 const DataLayout &DL) const {
21628 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21629}
21630
21632 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21633 const DataLayout &DL) const {
21634
21635 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21636 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21637
21638 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21639 return false;
21640
21641 // Ensure the vector doesn't have f16 elements. Even though we could do an
21642 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21643 // f32.
21644 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21645 return false;
21646 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21647 return false;
21648
21649 // Ensure the number of vector elements is greater than 1.
21650 if (VecTy->getNumElements() < 2)
21651 return false;
21652
21653 // Ensure the element type is legal.
21654 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21655 return false;
21656 // And the alignment if high enough under MVE.
21657 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21658 return false;
21659
21660 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21661 // 128 will be split into multiple interleaved accesses.
21662 if (Subtarget->hasNEON() && VecSize == 64)
21663 return true;
21664 return VecSize % 128 == 0;
21665}
21666
21668 if (Subtarget->hasNEON())
21669 return 4;
21670 if (Subtarget->hasMVEIntegerOps())
21673}
21674
21675/// Lower an interleaved load into a vldN intrinsic.
21676///
21677/// E.g. Lower an interleaved load (Factor = 2):
21678/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21679/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21680/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21681///
21682/// Into:
21683/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21684/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21685/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21688 ArrayRef<unsigned> Indices, unsigned Factor) const {
21689 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21690 "Invalid interleave factor");
21691 assert(!Shuffles.empty() && "Empty shufflevector input");
21692 assert(Shuffles.size() == Indices.size() &&
21693 "Unmatched number of shufflevectors and indices");
21694
21695 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21696 Type *EltTy = VecTy->getElementType();
21697
21698 const DataLayout &DL = LI->getDataLayout();
21699 Align Alignment = LI->getAlign();
21700
21701 // Skip if we do not have NEON and skip illegal vector types. We can
21702 // "legalize" wide vector types into multiple interleaved accesses as long as
21703 // the vector types are divisible by 128.
21704 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21705 return false;
21706
21707 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21708
21709 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21710 // load integer vectors first and then convert to pointer vectors.
21711 if (EltTy->isPointerTy())
21712 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21713
21714 IRBuilder<> Builder(LI);
21715
21716 // The base address of the load.
21717 Value *BaseAddr = LI->getPointerOperand();
21718
21719 if (NumLoads > 1) {
21720 // If we're going to generate more than one load, reset the sub-vector type
21721 // to something legal.
21722 VecTy = FixedVectorType::get(VecTy->getElementType(),
21723 VecTy->getNumElements() / NumLoads);
21724 }
21725
21726 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21727
21728 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21729 if (Subtarget->hasNEON()) {
21730 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21731 Type *Tys[] = {VecTy, PtrTy};
21732 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21733 Intrinsic::arm_neon_vld3,
21734 Intrinsic::arm_neon_vld4};
21735 Function *VldnFunc =
21736 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21737
21739 Ops.push_back(BaseAddr);
21740 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21741
21742 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21743 } else {
21744 assert((Factor == 2 || Factor == 4) &&
21745 "expected interleave factor of 2 or 4 for MVE");
21746 Intrinsic::ID LoadInts =
21747 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21748 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21749 Type *Tys[] = {VecTy, PtrTy};
21750 Function *VldnFunc =
21751 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21752
21754 Ops.push_back(BaseAddr);
21755 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21756 }
21757 };
21758
21759 // Holds sub-vectors extracted from the load intrinsic return values. The
21760 // sub-vectors are associated with the shufflevector instructions they will
21761 // replace.
21763
21764 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21765 // If we're generating more than one load, compute the base address of
21766 // subsequent loads as an offset from the previous.
21767 if (LoadCount > 0)
21768 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21769 VecTy->getNumElements() * Factor);
21770
21771 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21772
21773 // Replace uses of each shufflevector with the corresponding vector loaded
21774 // by ldN.
21775 for (unsigned i = 0; i < Shuffles.size(); i++) {
21776 ShuffleVectorInst *SV = Shuffles[i];
21777 unsigned Index = Indices[i];
21778
21779 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21780
21781 // Convert the integer vector to pointer vector if the element is pointer.
21782 if (EltTy->isPointerTy())
21783 SubVec = Builder.CreateIntToPtr(
21784 SubVec,
21786
21787 SubVecs[SV].push_back(SubVec);
21788 }
21789 }
21790
21791 // Replace uses of the shufflevector instructions with the sub-vectors
21792 // returned by the load intrinsic. If a shufflevector instruction is
21793 // associated with more than one sub-vector, those sub-vectors will be
21794 // concatenated into a single wide vector.
21795 for (ShuffleVectorInst *SVI : Shuffles) {
21796 auto &SubVec = SubVecs[SVI];
21797 auto *WideVec =
21798 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21799 SVI->replaceAllUsesWith(WideVec);
21800 }
21801
21802 return true;
21803}
21804
21805/// Lower an interleaved store into a vstN intrinsic.
21806///
21807/// E.g. Lower an interleaved store (Factor = 3):
21808/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21809/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21810/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21811///
21812/// Into:
21813/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21814/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21815/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21816/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21817///
21818/// Note that the new shufflevectors will be removed and we'll only generate one
21819/// vst3 instruction in CodeGen.
21820///
21821/// Example for a more general valid mask (Factor 3). Lower:
21822/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21823/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21824/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21825///
21826/// Into:
21827/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21828/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21829/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21830/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21832 ShuffleVectorInst *SVI,
21833 unsigned Factor) const {
21834 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21835 "Invalid interleave factor");
21836
21837 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21838 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21839
21840 unsigned LaneLen = VecTy->getNumElements() / Factor;
21841 Type *EltTy = VecTy->getElementType();
21842 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21843
21844 const DataLayout &DL = SI->getDataLayout();
21845 Align Alignment = SI->getAlign();
21846
21847 // Skip if we do not have NEON and skip illegal vector types. We can
21848 // "legalize" wide vector types into multiple interleaved accesses as long as
21849 // the vector types are divisible by 128.
21850 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21851 return false;
21852
21853 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21854
21855 Value *Op0 = SVI->getOperand(0);
21856 Value *Op1 = SVI->getOperand(1);
21857 IRBuilder<> Builder(SI);
21858
21859 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21860 // vectors to integer vectors.
21861 if (EltTy->isPointerTy()) {
21862 Type *IntTy = DL.getIntPtrType(EltTy);
21863
21864 // Convert to the corresponding integer vector.
21865 auto *IntVecTy =
21866 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21867 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21868 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21869
21870 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21871 }
21872
21873 // The base address of the store.
21874 Value *BaseAddr = SI->getPointerOperand();
21875
21876 if (NumStores > 1) {
21877 // If we're going to generate more than one store, reset the lane length
21878 // and sub-vector type to something legal.
21879 LaneLen /= NumStores;
21880 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21881 }
21882
21883 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21884
21885 auto Mask = SVI->getShuffleMask();
21886
21887 auto createStoreIntrinsic = [&](Value *BaseAddr,
21888 SmallVectorImpl<Value *> &Shuffles) {
21889 if (Subtarget->hasNEON()) {
21890 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21891 Intrinsic::arm_neon_vst3,
21892 Intrinsic::arm_neon_vst4};
21893 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21894 Type *Tys[] = {PtrTy, SubVecTy};
21895
21897 SI->getModule(), StoreInts[Factor - 2], Tys);
21898
21900 Ops.push_back(BaseAddr);
21901 append_range(Ops, Shuffles);
21902 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21903 Builder.CreateCall(VstNFunc, Ops);
21904 } else {
21905 assert((Factor == 2 || Factor == 4) &&
21906 "expected interleave factor of 2 or 4 for MVE");
21907 Intrinsic::ID StoreInts =
21908 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21909 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21910 Type *Tys[] = {PtrTy, SubVecTy};
21911 Function *VstNFunc =
21912 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21913
21915 Ops.push_back(BaseAddr);
21916 append_range(Ops, Shuffles);
21917 for (unsigned F = 0; F < Factor; F++) {
21918 Ops.push_back(Builder.getInt32(F));
21919 Builder.CreateCall(VstNFunc, Ops);
21920 Ops.pop_back();
21921 }
21922 }
21923 };
21924
21925 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21926 // If we generating more than one store, we compute the base address of
21927 // subsequent stores as an offset from the previous.
21928 if (StoreCount > 0)
21929 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21930 BaseAddr, LaneLen * Factor);
21931
21932 SmallVector<Value *, 4> Shuffles;
21933
21934 // Split the shufflevector operands into sub vectors for the new vstN call.
21935 for (unsigned i = 0; i < Factor; i++) {
21936 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21937 if (Mask[IdxI] >= 0) {
21938 Shuffles.push_back(Builder.CreateShuffleVector(
21939 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21940 } else {
21941 unsigned StartMask = 0;
21942 for (unsigned j = 1; j < LaneLen; j++) {
21943 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21944 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21945 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21946 break;
21947 }
21948 }
21949 // Note: If all elements in a chunk are undefs, StartMask=0!
21950 // Note: Filling undef gaps with random elements is ok, since
21951 // those elements were being written anyway (with undefs).
21952 // In the case of all undefs we're defaulting to using elems from 0
21953 // Note: StartMask cannot be negative, it's checked in
21954 // isReInterleaveMask
21955 Shuffles.push_back(Builder.CreateShuffleVector(
21956 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21957 }
21958 }
21959
21960 createStoreIntrinsic(BaseAddr, Shuffles);
21961 }
21962 return true;
21963}
21964
21972
21974 uint64_t &Members) {
21975 if (auto *ST = dyn_cast<StructType>(Ty)) {
21976 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21977 uint64_t SubMembers = 0;
21978 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21979 return false;
21980 Members += SubMembers;
21981 }
21982 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21983 uint64_t SubMembers = 0;
21984 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21985 return false;
21986 Members += SubMembers * AT->getNumElements();
21987 } else if (Ty->isFloatTy()) {
21988 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21989 return false;
21990 Members = 1;
21991 Base = HA_FLOAT;
21992 } else if (Ty->isDoubleTy()) {
21993 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21994 return false;
21995 Members = 1;
21996 Base = HA_DOUBLE;
21997 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21998 Members = 1;
21999 switch (Base) {
22000 case HA_FLOAT:
22001 case HA_DOUBLE:
22002 return false;
22003 case HA_VECT64:
22004 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
22005 case HA_VECT128:
22006 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
22007 case HA_UNKNOWN:
22008 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
22009 case 64:
22010 Base = HA_VECT64;
22011 return true;
22012 case 128:
22013 Base = HA_VECT128;
22014 return true;
22015 default:
22016 return false;
22017 }
22018 }
22019 }
22020
22021 return (Members > 0 && Members <= 4);
22022}
22023
22024/// Return the correct alignment for the current calling convention.
22026 Type *ArgTy, const DataLayout &DL) const {
22027 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
22028 if (!ArgTy->isVectorTy())
22029 return ABITypeAlign;
22030
22031 // Avoid over-aligning vector parameters. It would require realigning the
22032 // stack and waste space for no real benefit.
22033 return std::min(ABITypeAlign, DL.getStackAlignment());
22034}
22035
22036/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
22037/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
22038/// passing according to AAPCS rules.
22040 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22041 const DataLayout &DL) const {
22042 if (getEffectiveCallingConv(CallConv, isVarArg) !=
22044 return false;
22045
22047 uint64_t Members = 0;
22048 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
22049 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22050
22051 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22052 return IsHA || IsIntArray;
22053}
22054
22056 const Constant *PersonalityFn) const {
22057 // Platforms which do not use SjLj EH may return values in these registers
22058 // via the personality function.
22059 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22060}
22061
22063 const Constant *PersonalityFn) const {
22064 // Platforms which do not use SjLj EH may return values in these registers
22065 // via the personality function.
22066 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22067}
22068
22069void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
22070 // Update IsSplitCSR in ARMFunctionInfo.
22071 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22072 AFI->setIsSplitCSR(true);
22073}
22074
22075void ARMTargetLowering::insertCopiesSplitCSR(
22076 MachineBasicBlock *Entry,
22077 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
22078 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22079 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22080 if (!IStart)
22081 return;
22082
22083 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22084 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22085 MachineBasicBlock::iterator MBBI = Entry->begin();
22086 for (const MCPhysReg *I = IStart; *I; ++I) {
22087 const TargetRegisterClass *RC = nullptr;
22088 if (ARM::GPRRegClass.contains(*I))
22089 RC = &ARM::GPRRegClass;
22090 else if (ARM::DPRRegClass.contains(*I))
22091 RC = &ARM::DPRRegClass;
22092 else
22093 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
22094
22095 Register NewVR = MRI->createVirtualRegister(RC);
22096 // Create copy from CSR to a virtual register.
22097 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22098 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22099 // nounwind. If we want to generalize this later, we may need to emit
22100 // CFI pseudo-instructions.
22101 assert(Entry->getParent()->getFunction().hasFnAttribute(
22102 Attribute::NoUnwind) &&
22103 "Function should be nounwind in insertCopiesSplitCSR!");
22104 Entry->addLiveIn(*I);
22105 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22106 .addReg(*I);
22107
22108 // Insert the copy-back instructions right before the terminator.
22109 for (auto *Exit : Exits)
22110 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22111 TII->get(TargetOpcode::COPY), *I)
22112 .addReg(NewVR);
22113 }
22114}
22115
22119}
22120
22122 return Subtarget->hasMVEIntegerOps();
22123}
22124
22127 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22128 if (!VTy)
22129 return false;
22130
22131 auto *ScalarTy = VTy->getScalarType();
22132 unsigned NumElements = VTy->getNumElements();
22133
22134 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22135 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22136 return false;
22137
22138 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22139 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22140 return Subtarget->hasMVEFloatOps();
22141
22143 return false;
22144
22145 return Subtarget->hasMVEIntegerOps() &&
22146 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22147 ScalarTy->isIntegerTy(32));
22148}
22149
22152 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22153 Value *Accumulator) const {
22154
22155 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22156
22157 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22158
22159 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22160
22161 if (TyWidth > 128) {
22162 int Stride = Ty->getNumElements() / 2;
22163 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22164 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22165 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22166 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22167
22168 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22169 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22170 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22171 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22172 Value *LowerSplitAcc = nullptr;
22173 Value *UpperSplitAcc = nullptr;
22174
22175 if (Accumulator) {
22176 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22177 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22178 }
22179
22180 auto *LowerSplitInt = createComplexDeinterleavingIR(
22181 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22182 auto *UpperSplitInt = createComplexDeinterleavingIR(
22183 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22184
22185 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22186 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22187 }
22188
22189 auto *IntTy = Type::getInt32Ty(B.getContext());
22190
22191 ConstantInt *ConstRotation = nullptr;
22192 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22193 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22194
22195 if (Accumulator)
22196 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22197 {ConstRotation, Accumulator, InputB, InputA});
22198 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22199 {ConstRotation, InputB, InputA});
22200 }
22201
22202 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22203 // 1 means the value is not halved.
22204 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22205
22207 ConstRotation = ConstantInt::get(IntTy, 0);
22209 ConstRotation = ConstantInt::get(IntTy, 1);
22210
22211 if (!ConstRotation)
22212 return nullptr; // Invalid rotation for arm_mve_vcaddq
22213
22214 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22215 {ConstHalving, ConstRotation, InputA, InputB});
22216 }
22217
22218 return nullptr;
22219}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
static const LLT F64
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1399
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1627
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned logBase2() const
Definition: APInt.h:1717
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1613
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:312
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:321
bool hasARMOps() const
Definition: ARMSubtarget.h:265
bool supportsTailCall() const
Definition: ARMSubtarget.h:399
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:298
bool hasVFP4Base() const
Definition: ARMSubtarget.h:273
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool useFPVFMx() const
Definition: ARMSubtarget.h:282
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:288
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
bool useSjLjEH() const
Definition: ARMSubtarget.h:287
bool isTargetDarwin() const
Definition: ARMSubtarget.h:300
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isTargetAndroid() const
Definition: ARMSubtarget.h:350
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:310
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:326
bool hasVFP3Base() const
Definition: ARMSubtarget.h:272
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:286
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:302
bool hasMinSize() const
Definition: ARMSubtarget.h:363
bool isTargetIOS() const
Definition: ARMSubtarget.h:301
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:267
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:433
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:303
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:276
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:304
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:407
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:401
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:331
bool isTargetLinux() const
Definition: ARMSubtarget.h:305
bool useFPVFMx16() const
Definition: ARMSubtarget.h:285
bool isMClass() const
Definition: ARMSubtarget.h:366
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:486
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:280
bool isTargetELF() const
Definition: ARMSubtarget.h:311
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:443
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
bool isFloatingPointOperation() const
Definition: Instructions.h:864
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:94
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1594
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:195
bool isBigEndian() const
Definition: DataLayout.h:196
Align getStackAlignment() const
Definition: DataLayout.h:228
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:983
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:289
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:842
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
unsigned getNumElements() const
Definition: DerivedTypes.h:582
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
arg_iterator arg_begin()
Definition: Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:686
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:232
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1896
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1433
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2173
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:231
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:489
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
void reserve(size_type NumEntries)
Definition: SmallPtrSet.h:113
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const unsigned char * bytes_end() const
Definition: StringRef.h:118
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const unsigned char * bytes_begin() const
Definition: StringRef.h:115
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:399
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:500
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:634
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:248
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
Type * getArrayElementType() const
Definition: Type.h:399
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:212
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1066
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1355
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1440
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:511
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1337
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1339
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1340
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1070
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1089
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:157
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1425
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1301
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1093
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1439
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:963
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1335
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1336
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:996
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:935
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1115
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1422
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1426
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:787
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1338
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1441
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1434
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1084
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1061
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1333
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1279
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:906
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1341
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1442
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1331
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1047
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1332
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1250
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1423
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1330
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1001
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:112
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:905
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1214
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1636
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1554
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
@ Read
Definition: CodeGenData.h:102
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2431
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1541
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1312
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ MVEVMVNModImm
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:258
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:449
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:350
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:456
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:275
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:299
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:439
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:199
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:290
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition: KnownBits.h:333
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:797
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)