LLVM 19.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
160 SelectionDAG &DAG, const SDLoc &DL) {
162 assert(Arg.ArgVT.bitsLT(MVT::i32));
163 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
164 SDValue Ext =
166 MVT::i32, Trunc);
167 return Ext;
168}
169
170void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
171 if (VT != PromotedLdStVT) {
173 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
174
176 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
177 }
178
179 MVT ElemTy = VT.getVectorElementType();
180 if (ElemTy != MVT::f64)
184 if (ElemTy == MVT::i32) {
189 } else {
194 }
203 if (VT.isInteger()) {
207 }
208
209 // Neon does not support vector divide/remainder operations.
218
219 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
220 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
222 setOperationAction(Opcode, VT, Legal);
223 if (!VT.isFloatingPoint())
224 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
225 setOperationAction(Opcode, VT, Legal);
226}
227
228void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
229 addRegisterClass(VT, &ARM::DPRRegClass);
230 addTypeForNEON(VT, MVT::f64);
231}
232
233void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
234 addRegisterClass(VT, &ARM::DPairRegClass);
235 addTypeForNEON(VT, MVT::v2f64);
236}
237
238void ARMTargetLowering::setAllExpand(MVT VT) {
239 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
240 setOperationAction(Opc, VT, Expand);
241
242 // We support these really simple operations even on types where all
243 // the actual arithmetic has to be broken down into simpler
244 // operations or turned into library calls.
249}
250
251void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
252 LegalizeAction Action) {
253 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
254 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
256}
257
258void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
259 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
260
261 for (auto VT : IntTypes) {
262 addRegisterClass(VT, &ARM::MQPRRegClass);
292
293 // No native support for these.
303
304 // Vector reductions
314
315 if (!HasMVEFP) {
320 } else {
323 }
324
325 // Pre and Post inc are supported on loads and stores
326 for (unsigned im = (unsigned)ISD::PRE_INC;
332 }
333 }
334
335 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
336 for (auto VT : FloatTypes) {
337 addRegisterClass(VT, &ARM::MQPRRegClass);
338 if (!HasMVEFP)
339 setAllExpand(VT);
340
341 // These are legal or custom whether we have MVE.fp or not
354
355 // Pre and Post inc are supported on loads and stores
356 for (unsigned im = (unsigned)ISD::PRE_INC;
362 }
363
364 if (HasMVEFP) {
372
373 // No native support for these.
388 }
389 }
390
391 // Custom Expand smaller than legal vector reductions to prevent false zero
392 // items being added.
401
402 // We 'support' these types up to bitcast/load/store level, regardless of
403 // MVE integer-only / float support. Only doing FP data processing on the FP
404 // vector types is inhibited at integer-only level.
405 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
406 for (auto VT : LongTypes) {
407 addRegisterClass(VT, &ARM::MQPRRegClass);
408 setAllExpand(VT);
414 }
416
417 // We can do bitwise operations on v2i64 vectors
418 setOperationAction(ISD::AND, MVT::v2i64, Legal);
419 setOperationAction(ISD::OR, MVT::v2i64, Legal);
420 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
421
422 // It is legal to extload from v4i8 to v4i16 or v4i32.
423 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
424 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
426
427 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
433
434 // Some truncating stores are legal too.
435 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
436 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
437 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
438
439 // Pre and Post inc on these are legal, given the correct extends
440 for (unsigned im = (unsigned)ISD::PRE_INC;
442 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
447 }
448 }
449
450 // Predicate types
451 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
452 for (auto VT : pTypes) {
453 addRegisterClass(VT, &ARM::VCCRRegClass);
468
469 if (!HasMVEFP) {
474 }
475 }
479 setOperationAction(ISD::OR, MVT::v2i1, Expand);
485
494}
495
497 const ARMSubtarget &STI)
498 : TargetLowering(TM), Subtarget(&STI) {
499 RegInfo = Subtarget->getRegisterInfo();
500 Itins = Subtarget->getInstrItineraryData();
501
504
505 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
506 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
507 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
508 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
509 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
510 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
512 }
513
514 if (Subtarget->isTargetMachO()) {
515 // Uses VFP for Thumb libfuncs if available.
516 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
517 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
518 static const struct {
519 const RTLIB::Libcall Op;
520 const char * const Name;
521 const ISD::CondCode Cond;
522 } LibraryCalls[] = {
523 // Single-precision floating-point arithmetic.
524 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
525 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
528
529 // Double-precision floating-point arithmetic.
530 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
531 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
534
535 // Single-precision comparisons.
536 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
537 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
538 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
539 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
540 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
541 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
542 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
543
544 // Double-precision comparisons.
545 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
546 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
547 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
548 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
549 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
550 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
551 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
552
553 // Floating-point to integer conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
557 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
560
561 // Conversions between floating types.
562 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
563 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
564
565 // Integer to floating-point conversions.
566 // i64 conversions are done via library routines even when generating VFP
567 // instructions, so use the same ones.
568 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
569 // e.g., __floatunsidf vs. __floatunssidfvfp.
570 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
571 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
573 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
574 };
575
576 for (const auto &LC : LibraryCalls) {
577 setLibcallName(LC.Op, LC.Name);
578 if (LC.Cond != ISD::SETCC_INVALID)
579 setCmpLibcallCC(LC.Op, LC.Cond);
580 }
581 }
582 }
583
584 // RTLIB
585 if (Subtarget->isAAPCS_ABI() &&
586 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
587 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
588 static const struct {
589 const RTLIB::Libcall Op;
590 const char * const Name;
591 const CallingConv::ID CC;
592 const ISD::CondCode Cond;
593 } LibraryCalls[] = {
594 // Double-precision floating-point arithmetic helper functions
595 // RTABI chapter 4.1.2, Table 2
596 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600
601 // Double-precision floating-point comparison helper functions
602 // RTABI chapter 4.1.2, Table 3
603 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
605 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
606 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
610
611 // Single-precision floating-point arithmetic helper functions
612 // RTABI chapter 4.1.2, Table 4
613 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617
618 // Single-precision floating-point comparison helper functions
619 // RTABI chapter 4.1.2, Table 5
620 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
622 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
623 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
627
628 // Floating-point to integer conversions.
629 // RTABI chapter 4.1.2, Table 6
630 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638
639 // Conversions between floating types.
640 // RTABI chapter 4.1.2, Table 7
641 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644
645 // Integer to floating-point conversions.
646 // RTABI chapter 4.1.2, Table 8
647 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655
656 // Long long helper functions
657 // RTABI chapter 4.2, Table 9
658 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662
663 // Integer division functions
664 // RTABI chapter 4.3.1
665 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 };
674
675 for (const auto &LC : LibraryCalls) {
676 setLibcallName(LC.Op, LC.Name);
677 setLibcallCallingConv(LC.Op, LC.CC);
678 if (LC.Cond != ISD::SETCC_INVALID)
679 setCmpLibcallCC(LC.Op, LC.Cond);
680 }
681
682 // EABI dependent RTLIB
683 if (TM.Options.EABIVersion == EABI::EABI4 ||
684 TM.Options.EABIVersion == EABI::EABI5) {
685 static const struct {
686 const RTLIB::Libcall Op;
687 const char *const Name;
688 const CallingConv::ID CC;
689 const ISD::CondCode Cond;
690 } MemOpsLibraryCalls[] = {
691 // Memory operations
692 // RTABI chapter 4.3.4
693 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
694 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 };
697
698 for (const auto &LC : MemOpsLibraryCalls) {
699 setLibcallName(LC.Op, LC.Name);
700 setLibcallCallingConv(LC.Op, LC.CC);
701 if (LC.Cond != ISD::SETCC_INVALID)
702 setCmpLibcallCC(LC.Op, LC.Cond);
703 }
704 }
705 }
706
707 if (Subtarget->isTargetWindows()) {
708 static const struct {
709 const RTLIB::Libcall Op;
710 const char * const Name;
711 const CallingConv::ID CC;
712 } LibraryCalls[] = {
713 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
721 };
722
723 for (const auto &LC : LibraryCalls) {
724 setLibcallName(LC.Op, LC.Name);
725 setLibcallCallingConv(LC.Op, LC.CC);
726 }
727 }
728
729 // Use divmod compiler-rt calls for iOS 5.0 and later.
730 if (Subtarget->isTargetMachO() &&
731 !(Subtarget->isTargetIOS() &&
732 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
733 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
734 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
735 }
736
737 // The half <-> float conversion functions are always soft-float on
738 // non-watchos platforms, but are needed for some targets which use a
739 // hard-float calling convention by default.
740 if (!Subtarget->isTargetWatchABI()) {
741 if (Subtarget->isAAPCS_ABI()) {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
745 } else {
746 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
747 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
749 }
750 }
751
752 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
753 // a __gnu_ prefix (which is the default).
754 if (Subtarget->isTargetAEABI()) {
755 static const struct {
756 const RTLIB::Libcall Op;
757 const char * const Name;
758 const CallingConv::ID CC;
759 } LibraryCalls[] = {
760 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
761 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
763 };
764
765 for (const auto &LC : LibraryCalls) {
766 setLibcallName(LC.Op, LC.Name);
767 setLibcallCallingConv(LC.Op, LC.CC);
768 }
769 }
770
771 if (Subtarget->isThumb1Only())
772 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
773 else
774 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
775
776 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
777 Subtarget->hasFPRegs()) {
778 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
779 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
780
785
786 if (!Subtarget->hasVFP2Base())
787 setAllExpand(MVT::f32);
788 if (!Subtarget->hasFP64())
789 setAllExpand(MVT::f64);
790 }
791
792 if (Subtarget->hasFullFP16()) {
793 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
796
799 }
800
801 if (Subtarget->hasBF16()) {
802 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
803 setAllExpand(MVT::bf16);
804 if (!Subtarget->hasFullFP16())
806 }
807
809 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
810 setTruncStoreAction(VT, InnerVT, Expand);
811 addAllExtLoads(VT, InnerVT, Expand);
812 }
813
816
818 }
819
822
825
826 if (Subtarget->hasMVEIntegerOps())
827 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
828
829 // Combine low-overhead loop intrinsics so that we can lower i1 types.
830 if (Subtarget->hasLOB()) {
832 }
833
834 if (Subtarget->hasNEON()) {
835 addDRTypeForNEON(MVT::v2f32);
836 addDRTypeForNEON(MVT::v8i8);
837 addDRTypeForNEON(MVT::v4i16);
838 addDRTypeForNEON(MVT::v2i32);
839 addDRTypeForNEON(MVT::v1i64);
840
841 addQRTypeForNEON(MVT::v4f32);
842 addQRTypeForNEON(MVT::v2f64);
843 addQRTypeForNEON(MVT::v16i8);
844 addQRTypeForNEON(MVT::v8i16);
845 addQRTypeForNEON(MVT::v4i32);
846 addQRTypeForNEON(MVT::v2i64);
847
848 if (Subtarget->hasFullFP16()) {
849 addQRTypeForNEON(MVT::v8f16);
850 addDRTypeForNEON(MVT::v4f16);
851 }
852
853 if (Subtarget->hasBF16()) {
854 addQRTypeForNEON(MVT::v8bf16);
855 addDRTypeForNEON(MVT::v4bf16);
856 }
857 }
858
859 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
860 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
861 // none of Neon, MVE or VFP supports any arithmetic operations on it.
862 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
863 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
864 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
865 // FIXME: Code duplication: FDIV and FREM are expanded always, see
866 // ARMTargetLowering::addTypeForNEON method for details.
867 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
868 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
869 // FIXME: Create unittest.
870 // In another words, find a way when "copysign" appears in DAG with vector
871 // operands.
873 // FIXME: Code duplication: SETCC has custom operation action, see
874 // ARMTargetLowering::addTypeForNEON method for details.
876 // FIXME: Create unittest for FNEG and for FABS.
877 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
878 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
880 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
881 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
882 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
883 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
884 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
887 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
890 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
896 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
897 }
898
899 if (Subtarget->hasNEON()) {
900 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
901 // supported for v4f32.
903 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
904 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
905 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
906 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
907 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
910 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
918
919 // Mark v2f32 intrinsics.
921 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
922 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
923 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
924 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
925 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
928 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
936
937 // Neon does not support some operations on v1i64 and v2i64 types.
938 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
939 // Custom handling for some quad-vector types to detect VMULL.
940 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
941 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
942 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
943 // Custom handling for some vector types to avoid expensive expansions
944 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
946 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
948 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
949 // a destination type that is wider than the source, and nor does
950 // it have a FP_TO_[SU]INT instruction with a narrower destination than
951 // source.
960
963
964 // NEON does not have single instruction CTPOP for vectors with element
965 // types wider than 8-bits. However, custom lowering can leverage the
966 // v8i8/v16i8 vcnt instruction.
973
974 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
975 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
976
977 // NEON does not have single instruction CTTZ for vectors.
979 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
980 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
981 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
982
983 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
986 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
987
992
997
1001 }
1002
1003 // NEON only has FMA instructions as of VFP4.
1004 if (!Subtarget->hasVFP4Base()) {
1005 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1006 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1007 }
1008
1011
1012 // It is legal to extload from v4i8 to v4i16 or v4i32.
1013 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1014 MVT::v2i32}) {
1019 }
1020 }
1021
1022 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1023 MVT::v4i32}) {
1028 }
1029 }
1030
1031 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1038 }
1039 if (Subtarget->hasMVEIntegerOps()) {
1042 ISD::SETCC});
1043 }
1044 if (Subtarget->hasMVEFloatOps()) {
1046 }
1047
1048 if (!Subtarget->hasFP64()) {
1049 // When targeting a floating-point unit with only single-precision
1050 // operations, f64 is legal for the few double-precision instructions which
1051 // are present However, no double-precision operations other than moves,
1052 // loads and stores are provided by the hardware.
1090 }
1091
1092 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1095 if (Subtarget->hasFullFP16()) {
1098 }
1099 }
1100
1101 if (!Subtarget->hasFP16()) {
1104 }
1105
1107
1108 // ARM does not have floating-point extending loads.
1109 for (MVT VT : MVT::fp_valuetypes()) {
1110 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1111 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1112 }
1113
1114 // ... or truncating stores
1115 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1116 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1117 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1118
1119 // ARM does not have i1 sign extending load.
1120 for (MVT VT : MVT::integer_valuetypes())
1121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1122
1123 // ARM supports all 4 flavors of integer indexed load / store.
1124 if (!Subtarget->isThumb1Only()) {
1125 for (unsigned im = (unsigned)ISD::PRE_INC;
1127 setIndexedLoadAction(im, MVT::i1, Legal);
1128 setIndexedLoadAction(im, MVT::i8, Legal);
1129 setIndexedLoadAction(im, MVT::i16, Legal);
1130 setIndexedLoadAction(im, MVT::i32, Legal);
1131 setIndexedStoreAction(im, MVT::i1, Legal);
1132 setIndexedStoreAction(im, MVT::i8, Legal);
1133 setIndexedStoreAction(im, MVT::i16, Legal);
1134 setIndexedStoreAction(im, MVT::i32, Legal);
1135 }
1136 } else {
1137 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1140 }
1141
1146
1149 if (Subtarget->hasDSP()) {
1158 }
1159 if (Subtarget->hasBaseDSP()) {
1162 }
1163
1164 // i64 operation support.
1167 if (Subtarget->isThumb1Only()) {
1170 }
1171 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1172 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1174
1184
1185 // MVE lowers 64 bit shifts to lsll and lsrl
1186 // assuming that ISD::SRL and SRA of i64 are already marked custom
1187 if (Subtarget->hasMVEIntegerOps())
1189
1190 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1191 if (Subtarget->isThumb1Only()) {
1195 }
1196
1197 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1199
1200 // ARM does not have ROTL.
1205 }
1208 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1211 }
1212
1213 // @llvm.readcyclecounter requires the Performance Monitors extension.
1214 // Default to the 0 expansion on unsupported platforms.
1215 // FIXME: Technically there are older ARM CPUs that have
1216 // implementation-specific ways of obtaining this information.
1217 if (Subtarget->hasPerfMon())
1219
1220 // Only ARMv6 has BSWAP.
1221 if (!Subtarget->hasV6Ops())
1223
1224 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1225 : Subtarget->hasDivideInARMMode();
1226 if (!hasDivide) {
1227 // These are expanded into libcalls if the cpu doesn't have HW divider.
1230 }
1231
1232 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1235
1238 }
1239
1242
1243 // Register based DivRem for AEABI (RTABI 4.2)
1244 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1245 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1246 Subtarget->isTargetWindows()) {
1249 HasStandaloneRem = false;
1250
1251 if (Subtarget->isTargetWindows()) {
1252 const struct {
1253 const RTLIB::Libcall Op;
1254 const char * const Name;
1255 const CallingConv::ID CC;
1256 } LibraryCalls[] = {
1257 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1259 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1260 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1261
1262 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1263 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1264 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1265 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1266 };
1267
1268 for (const auto &LC : LibraryCalls) {
1269 setLibcallName(LC.Op, LC.Name);
1270 setLibcallCallingConv(LC.Op, LC.CC);
1271 }
1272 } else {
1273 const struct {
1274 const RTLIB::Libcall Op;
1275 const char * const Name;
1276 const CallingConv::ID CC;
1277 } LibraryCalls[] = {
1278 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1280 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1281 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1282
1283 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1284 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1285 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1286 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1287 };
1288
1289 for (const auto &LC : LibraryCalls) {
1290 setLibcallName(LC.Op, LC.Name);
1291 setLibcallCallingConv(LC.Op, LC.CC);
1292 }
1293 }
1294
1299 } else {
1302 }
1303
1308
1309 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1311
1312 // Use the default implementation.
1314 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1316 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1319
1320 if (Subtarget->isTargetWindows())
1322 else
1324
1325 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1326 // the default expansion.
1327 InsertFencesForAtomic = false;
1328 if (Subtarget->hasAnyDataBarrier() &&
1329 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1330 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1331 // to ldrex/strex loops already.
1333 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1335
1336 // On v8, we have particularly efficient implementations of atomic fences
1337 // if they can be combined with nearby atomic loads and stores.
1338 if (!Subtarget->hasAcquireRelease() ||
1339 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1340 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1341 InsertFencesForAtomic = true;
1342 }
1343 } else {
1344 // If there's anything we can use as a barrier, go through custom lowering
1345 // for ATOMIC_FENCE.
1346 // If target has DMB in thumb, Fences can be inserted.
1347 if (Subtarget->hasDataBarrier())
1348 InsertFencesForAtomic = true;
1349
1351 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1352
1353 // Set them all for libcall, which will force libcalls.
1366 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1367 // Unordered/Monotonic case.
1368 if (!InsertFencesForAtomic) {
1371 }
1372 }
1373
1374 // Compute supported atomic widths.
1375 if (Subtarget->isTargetLinux() ||
1376 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1377 // For targets where __sync_* routines are reliably available, we use them
1378 // if necessary.
1379 //
1380 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1381 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1382 //
1383 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1384 // such targets should provide __sync_* routines, which use the ARM mode
1385 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1386 // encoding; see ARMISD::MEMBARRIER_MCR.)
1388 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1389 Subtarget->hasForced32BitAtomics()) {
1390 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1392 } else {
1393 // We can't assume anything about other targets; just use libatomic
1394 // routines.
1396 }
1397
1399
1401
1402 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1403 if (!Subtarget->hasV6Ops()) {
1406 }
1408
1409 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1410 !Subtarget->isThumb1Only()) {
1411 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1412 // iff target supports vfp2.
1422 }
1423
1424 // We want to custom lower some of our intrinsics.
1429 if (Subtarget->useSjLjEH())
1430 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1431
1441 if (Subtarget->hasFullFP16()) {
1445 }
1446
1448
1451 if (Subtarget->hasFullFP16())
1455 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1456
1457 // We don't support sin/cos/fmod/copysign/pow
1466 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1467 !Subtarget->isThumb1Only()) {
1470 }
1473
1474 if (!Subtarget->hasVFP4Base()) {
1477 }
1478
1479 // Various VFP goodness
1480 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1481 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1482 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1485 }
1486
1487 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1488 if (!Subtarget->hasFP16()) {
1491 }
1492
1493 // Strict floating-point comparisons need custom lowering.
1500 }
1501
1502 // Use __sincos_stret if available.
1503 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1504 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1507 }
1508
1509 // FP-ARMv8 implements a lot of rounding-like FP operations.
1510 if (Subtarget->hasFPARMv8Base()) {
1519 if (Subtarget->hasNEON()) {
1524 }
1525
1526 if (Subtarget->hasFP64()) {
1535 }
1536 }
1537
1538 // FP16 often need to be promoted to call lib functions
1539 if (Subtarget->hasFullFP16()) {
1554
1556 }
1557
1558 if (Subtarget->hasNEON()) {
1559 // vmin and vmax aren't available in a scalar form, so we can use
1560 // a NEON instruction with an undef lane instead.
1569
1570 if (Subtarget->hasFullFP16()) {
1575
1580 }
1581 }
1582
1583 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1584 // it, but it's just a wrapper around ldexp.
1585 if (Subtarget->isTargetWindows()) {
1587 if (isOperationExpand(Op, MVT::f32))
1588 setOperationAction(Op, MVT::f32, Promote);
1589 }
1590
1591 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1592 // isn't legal.
1594 if (isOperationExpand(Op, MVT::f16))
1595 setOperationAction(Op, MVT::f16, Promote);
1596
1597 // We have target-specific dag combine patterns for the following nodes:
1598 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1601
1602 if (Subtarget->hasMVEIntegerOps())
1604
1605 if (Subtarget->hasV6Ops())
1607 if (Subtarget->isThumb1Only())
1609 // Attempt to lower smin/smax to ssat/usat
1610 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1611 Subtarget->isThumb2()) {
1613 }
1614
1616
1617 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1618 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1620 else
1622
1623 //// temporary - rewrite interface to use type
1626 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1628 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1630
1631 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1632 // are at least 4 bytes aligned.
1634
1635 // Prefer likely predicted branches to selects on out-of-order cores.
1636 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1637
1638 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1640
1641 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1642}
1643
1645 return Subtarget->useSoftFloat();
1646}
1647
1648// FIXME: It might make sense to define the representative register class as the
1649// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1650// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1651// SPR's representative would be DPR_VFP2. This should work well if register
1652// pressure tracking were modified such that a register use would increment the
1653// pressure of the register class's representative and all of it's super
1654// classes' representatives transitively. We have not implemented this because
1655// of the difficulty prior to coalescing of modeling operand register classes
1656// due to the common occurrence of cross class copies and subregister insertions
1657// and extractions.
1658std::pair<const TargetRegisterClass *, uint8_t>
1660 MVT VT) const {
1661 const TargetRegisterClass *RRC = nullptr;
1662 uint8_t Cost = 1;
1663 switch (VT.SimpleTy) {
1664 default:
1666 // Use DPR as representative register class for all floating point
1667 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1668 // the cost is 1 for both f32 and f64.
1669 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1670 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1671 RRC = &ARM::DPRRegClass;
1672 // When NEON is used for SP, only half of the register file is available
1673 // because operations that define both SP and DP results will be constrained
1674 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1675 // coalescing by double-counting the SP regs. See the FIXME above.
1676 if (Subtarget->useNEONForSinglePrecisionFP())
1677 Cost = 2;
1678 break;
1679 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1680 case MVT::v4f32: case MVT::v2f64:
1681 RRC = &ARM::DPRRegClass;
1682 Cost = 2;
1683 break;
1684 case MVT::v4i64:
1685 RRC = &ARM::DPRRegClass;
1686 Cost = 4;
1687 break;
1688 case MVT::v8i64:
1689 RRC = &ARM::DPRRegClass;
1690 Cost = 8;
1691 break;
1692 }
1693 return std::make_pair(RRC, Cost);
1694}
1695
1696const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1697#define MAKE_CASE(V) \
1698 case V: \
1699 return #V;
1700 switch ((ARMISD::NodeType)Opcode) {
1702 break;
1905#undef MAKE_CASE
1906 }
1907 return nullptr;
1908}
1909
1911 EVT VT) const {
1912 if (!VT.isVector())
1913 return getPointerTy(DL);
1914
1915 // MVE has a predicate register.
1916 if ((Subtarget->hasMVEIntegerOps() &&
1917 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1918 VT == MVT::v16i8)) ||
1919 (Subtarget->hasMVEFloatOps() &&
1920 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1921 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1923}
1924
1925/// getRegClassFor - Return the register class that should be used for the
1926/// specified value type.
1927const TargetRegisterClass *
1928ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1929 (void)isDivergent;
1930 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1931 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1932 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1933 // MVE Q registers.
1934 if (Subtarget->hasNEON()) {
1935 if (VT == MVT::v4i64)
1936 return &ARM::QQPRRegClass;
1937 if (VT == MVT::v8i64)
1938 return &ARM::QQQQPRRegClass;
1939 }
1940 if (Subtarget->hasMVEIntegerOps()) {
1941 if (VT == MVT::v4i64)
1942 return &ARM::MQQPRRegClass;
1943 if (VT == MVT::v8i64)
1944 return &ARM::MQQQQPRRegClass;
1945 }
1947}
1948
1949// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1950// source/dest is aligned and the copy size is large enough. We therefore want
1951// to align such objects passed to memory intrinsics.
1953 Align &PrefAlign) const {
1954 if (!isa<MemIntrinsic>(CI))
1955 return false;
1956 MinSize = 8;
1957 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1958 // cycle faster than 4-byte aligned LDM.
1959 PrefAlign =
1960 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1961 return true;
1962}
1963
1964// Create a fast isel object.
1965FastISel *
1967 const TargetLibraryInfo *libInfo) const {
1968 return ARM::createFastISel(funcInfo, libInfo);
1969}
1970
1972 unsigned NumVals = N->getNumValues();
1973 if (!NumVals)
1974 return Sched::RegPressure;
1975
1976 for (unsigned i = 0; i != NumVals; ++i) {
1977 EVT VT = N->getValueType(i);
1978 if (VT == MVT::Glue || VT == MVT::Other)
1979 continue;
1980 if (VT.isFloatingPoint() || VT.isVector())
1981 return Sched::ILP;
1982 }
1983
1984 if (!N->isMachineOpcode())
1985 return Sched::RegPressure;
1986
1987 // Load are scheduled for latency even if there instruction itinerary
1988 // is not available.
1989 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1990 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1991
1992 if (MCID.getNumDefs() == 0)
1993 return Sched::RegPressure;
1994 if (!Itins->isEmpty() &&
1995 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1996 return Sched::ILP;
1997
1998 return Sched::RegPressure;
1999}
2000
2001//===----------------------------------------------------------------------===//
2002// Lowering Code
2003//===----------------------------------------------------------------------===//
2004
2005static bool isSRL16(const SDValue &Op) {
2006 if (Op.getOpcode() != ISD::SRL)
2007 return false;
2008 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2009 return Const->getZExtValue() == 16;
2010 return false;
2011}
2012
2013static bool isSRA16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SRA)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021static bool isSHL16(const SDValue &Op) {
2022 if (Op.getOpcode() != ISD::SHL)
2023 return false;
2024 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2025 return Const->getZExtValue() == 16;
2026 return false;
2027}
2028
2029// Check for a signed 16-bit value. We special case SRA because it makes it
2030// more simple when also looking for SRAs that aren't sign extending a
2031// smaller value. Without the check, we'd need to take extra care with
2032// checking order for some operations.
2033static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2034 if (isSRA16(Op))
2035 return isSHL16(Op.getOperand(0));
2036 return DAG.ComputeNumSignBits(Op) == 17;
2037}
2038
2039/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2041 switch (CC) {
2042 default: llvm_unreachable("Unknown condition code!");
2043 case ISD::SETNE: return ARMCC::NE;
2044 case ISD::SETEQ: return ARMCC::EQ;
2045 case ISD::SETGT: return ARMCC::GT;
2046 case ISD::SETGE: return ARMCC::GE;
2047 case ISD::SETLT: return ARMCC::LT;
2048 case ISD::SETLE: return ARMCC::LE;
2049 case ISD::SETUGT: return ARMCC::HI;
2050 case ISD::SETUGE: return ARMCC::HS;
2051 case ISD::SETULT: return ARMCC::LO;
2052 case ISD::SETULE: return ARMCC::LS;
2053 }
2054}
2055
2056/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2058 ARMCC::CondCodes &CondCode2) {
2059 CondCode2 = ARMCC::AL;
2060 switch (CC) {
2061 default: llvm_unreachable("Unknown FP condition!");
2062 case ISD::SETEQ:
2063 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2064 case ISD::SETGT:
2065 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2066 case ISD::SETGE:
2067 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2068 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2069 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2070 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2071 case ISD::SETO: CondCode = ARMCC::VC; break;
2072 case ISD::SETUO: CondCode = ARMCC::VS; break;
2073 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2074 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2075 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2076 case ISD::SETLT:
2077 case ISD::SETULT: CondCode = ARMCC::LT; break;
2078 case ISD::SETLE:
2079 case ISD::SETULE: CondCode = ARMCC::LE; break;
2080 case ISD::SETNE:
2081 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2082 }
2083}
2084
2085//===----------------------------------------------------------------------===//
2086// Calling Convention Implementation
2087//===----------------------------------------------------------------------===//
2088
2089/// getEffectiveCallingConv - Get the effective calling convention, taking into
2090/// account presence of floating point hardware and calling convention
2091/// limitations, such as support for variadic functions.
2093ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2094 bool isVarArg) const {
2095 switch (CC) {
2096 default:
2097 report_fatal_error("Unsupported calling convention");
2100 case CallingConv::GHC:
2102 return CC;
2108 case CallingConv::Swift:
2111 case CallingConv::C:
2112 case CallingConv::Tail:
2113 if (!Subtarget->isAAPCS_ABI())
2114 return CallingConv::ARM_APCS;
2115 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2116 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2117 !isVarArg)
2119 else
2121 case CallingConv::Fast:
2123 if (!Subtarget->isAAPCS_ABI()) {
2124 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2125 return CallingConv::Fast;
2126 return CallingConv::ARM_APCS;
2127 } else if (Subtarget->hasVFP2Base() &&
2128 !Subtarget->isThumb1Only() && !isVarArg)
2130 else
2132 }
2133}
2134
2136 bool isVarArg) const {
2137 return CCAssignFnForNode(CC, false, isVarArg);
2138}
2139
2141 bool isVarArg) const {
2142 return CCAssignFnForNode(CC, true, isVarArg);
2143}
2144
2145/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2146/// CallingConvention.
2147CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2148 bool Return,
2149 bool isVarArg) const {
2150 switch (getEffectiveCallingConv(CC, isVarArg)) {
2151 default:
2152 report_fatal_error("Unsupported calling convention");
2154 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2158 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2159 case CallingConv::Fast:
2160 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2161 case CallingConv::GHC:
2162 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2166 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2168 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2169 }
2170}
2171
2172SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2173 MVT LocVT, MVT ValVT, SDValue Val) const {
2174 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2175 Val);
2176 if (Subtarget->hasFullFP16()) {
2177 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2178 } else {
2179 Val = DAG.getNode(ISD::TRUNCATE, dl,
2180 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2181 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2182 }
2183 return Val;
2184}
2185
2186SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2187 MVT LocVT, MVT ValVT,
2188 SDValue Val) const {
2189 if (Subtarget->hasFullFP16()) {
2190 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2191 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2192 } else {
2193 Val = DAG.getNode(ISD::BITCAST, dl,
2194 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2195 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2196 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2197 }
2198 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2199}
2200
2201/// LowerCallResult - Lower the result values of a call into the
2202/// appropriate copies out of appropriate physical registers.
2203SDValue ARMTargetLowering::LowerCallResult(
2204 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2205 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2206 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2207 SDValue ThisVal, bool isCmseNSCall) const {
2208 // Assign locations to each value returned by this call.
2210 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2211 *DAG.getContext());
2212 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2213
2214 // Copy all of the result registers out of their specified physreg.
2215 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2216 CCValAssign VA = RVLocs[i];
2217
2218 // Pass 'this' value directly from the argument to return value, to avoid
2219 // reg unit interference
2220 if (i == 0 && isThisReturn) {
2221 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2222 "unexpected return calling convention register assignment");
2223 InVals.push_back(ThisVal);
2224 continue;
2225 }
2226
2227 SDValue Val;
2228 if (VA.needsCustom() &&
2229 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2230 // Handle f64 or half of a v2f64.
2231 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2232 InGlue);
2233 Chain = Lo.getValue(1);
2234 InGlue = Lo.getValue(2);
2235 VA = RVLocs[++i]; // skip ahead to next loc
2236 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2237 InGlue);
2238 Chain = Hi.getValue(1);
2239 InGlue = Hi.getValue(2);
2240 if (!Subtarget->isLittle())
2241 std::swap (Lo, Hi);
2242 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2243
2244 if (VA.getLocVT() == MVT::v2f64) {
2245 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2246 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2247 DAG.getConstant(0, dl, MVT::i32));
2248
2249 VA = RVLocs[++i]; // skip ahead to next loc
2250 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2251 Chain = Lo.getValue(1);
2252 InGlue = Lo.getValue(2);
2253 VA = RVLocs[++i]; // skip ahead to next loc
2254 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2255 Chain = Hi.getValue(1);
2256 InGlue = Hi.getValue(2);
2257 if (!Subtarget->isLittle())
2258 std::swap (Lo, Hi);
2259 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2260 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2261 DAG.getConstant(1, dl, MVT::i32));
2262 }
2263 } else {
2264 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2265 InGlue);
2266 Chain = Val.getValue(1);
2267 InGlue = Val.getValue(2);
2268 }
2269
2270 switch (VA.getLocInfo()) {
2271 default: llvm_unreachable("Unknown loc info!");
2272 case CCValAssign::Full: break;
2273 case CCValAssign::BCvt:
2274 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2275 break;
2276 }
2277
2278 // f16 arguments have their size extended to 4 bytes and passed as if they
2279 // had been copied to the LSBs of a 32-bit register.
2280 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2281 if (VA.needsCustom() &&
2282 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2283 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2284
2285 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2286 // is less than 32 bits must be sign- or zero-extended after the call for
2287 // security reasons. Although the ABI mandates an extension done by the
2288 // callee, the latter cannot be trusted to follow the rules of the ABI.
2289 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2290 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2291 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2292 Val = handleCMSEValue(Val, Arg, DAG, dl);
2293
2294 InVals.push_back(Val);
2295 }
2296
2297 return Chain;
2298}
2299
2300std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2301 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2302 bool IsTailCall, int SPDiff) const {
2303 SDValue DstAddr;
2304 MachinePointerInfo DstInfo;
2305 int32_t Offset = VA.getLocMemOffset();
2307
2308 if (IsTailCall) {
2309 Offset += SPDiff;
2310 auto PtrVT = getPointerTy(DAG.getDataLayout());
2311 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2312 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2313 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2314 DstInfo =
2316 } else {
2317 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2318 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2319 StackPtr, PtrOff);
2320 DstInfo =
2322 }
2323
2324 return std::make_pair(DstAddr, DstInfo);
2325}
2326
2327void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2328 SDValue Chain, SDValue &Arg,
2329 RegsToPassVector &RegsToPass,
2330 CCValAssign &VA, CCValAssign &NextVA,
2331 SDValue &StackPtr,
2332 SmallVectorImpl<SDValue> &MemOpChains,
2333 bool IsTailCall,
2334 int SPDiff) const {
2335 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2336 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2337 unsigned id = Subtarget->isLittle() ? 0 : 1;
2338 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2339
2340 if (NextVA.isRegLoc())
2341 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2342 else {
2343 assert(NextVA.isMemLoc());
2344 if (!StackPtr.getNode())
2345 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2347
2348 SDValue DstAddr;
2349 MachinePointerInfo DstInfo;
2350 std::tie(DstAddr, DstInfo) =
2351 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2352 MemOpChains.push_back(
2353 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2354 }
2355}
2356
2357static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2358 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2360}
2361
2362/// LowerCall - Lowering a call into a callseq_start <-
2363/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2364/// nodes.
2365SDValue
2366ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2367 SmallVectorImpl<SDValue> &InVals) const {
2368 SelectionDAG &DAG = CLI.DAG;
2369 SDLoc &dl = CLI.DL;
2371 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2373 SDValue Chain = CLI.Chain;
2374 SDValue Callee = CLI.Callee;
2375 bool &isTailCall = CLI.IsTailCall;
2376 CallingConv::ID CallConv = CLI.CallConv;
2377 bool doesNotRet = CLI.DoesNotReturn;
2378 bool isVarArg = CLI.IsVarArg;
2379
2383 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2384 bool isThisReturn = false;
2385 bool isCmseNSCall = false;
2386 bool isSibCall = false;
2387 bool PreferIndirect = false;
2388 bool GuardWithBTI = false;
2389
2390 // Analyze operands of the call, assigning locations to each operand.
2392 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2393 *DAG.getContext());
2394 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2395
2396 // Lower 'returns_twice' calls to a pseudo-instruction.
2397 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2398 !Subtarget->noBTIAtReturnTwice())
2399 GuardWithBTI = AFI->branchTargetEnforcement();
2400
2401 // Determine whether this is a non-secure function call.
2402 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2403 isCmseNSCall = true;
2404
2405 // Disable tail calls if they're not supported.
2406 if (!Subtarget->supportsTailCall())
2407 isTailCall = false;
2408
2409 // For both the non-secure calls and the returns from a CMSE entry function,
2410 // the function needs to do some extra work afte r the call, or before the
2411 // return, respectively, thus it cannot end with atail call
2412 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2413 isTailCall = false;
2414
2415 if (isa<GlobalAddressSDNode>(Callee)) {
2416 // If we're optimizing for minimum size and the function is called three or
2417 // more times in this block, we can improve codesize by calling indirectly
2418 // as BLXr has a 16-bit encoding.
2419 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2420 if (CLI.CB) {
2421 auto *BB = CLI.CB->getParent();
2422 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2423 count_if(GV->users(), [&BB](const User *U) {
2424 return isa<Instruction>(U) &&
2425 cast<Instruction>(U)->getParent() == BB;
2426 }) > 2;
2427 }
2428 }
2429 if (isTailCall) {
2430 // Check if it's really possible to do a tail call.
2431 isTailCall =
2432 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2433
2434 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2435 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2436 isSibCall = true;
2437
2438 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2439 // detected sibcalls.
2440 if (isTailCall)
2441 ++NumTailCalls;
2442 }
2443
2444 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2445 report_fatal_error("failed to perform tail call elimination on a call "
2446 "site marked musttail");
2447
2448 // Get a count of how many bytes are to be pushed on the stack.
2449 unsigned NumBytes = CCInfo.getStackSize();
2450
2451 // SPDiff is the byte offset of the call's argument area from the callee's.
2452 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2453 // by this amount for a tail call. In a sibling call it must be 0 because the
2454 // caller will deallocate the entire stack and the callee still expects its
2455 // arguments to begin at SP+0. Completely unused for non-tail calls.
2456 int SPDiff = 0;
2457
2458 if (isTailCall && !isSibCall) {
2459 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2460 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2461
2462 // Since callee will pop argument stack as a tail call, we must keep the
2463 // popped size 16-byte aligned.
2464 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2465 NumBytes = alignTo(NumBytes, StackAlign);
2466
2467 // SPDiff will be negative if this tail call requires more space than we
2468 // would automatically have in our incoming argument space. Positive if we
2469 // can actually shrink the stack.
2470 SPDiff = NumReusableBytes - NumBytes;
2471
2472 // If this call requires more stack than we have available from
2473 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2474 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2475 AFI->setArgRegsSaveSize(-SPDiff);
2476 }
2477
2478 if (isSibCall) {
2479 // For sibling tail calls, memory operands are available in our caller's stack.
2480 NumBytes = 0;
2481 } else {
2482 // Adjust the stack pointer for the new arguments...
2483 // These operations are automatically eliminated by the prolog/epilog pass
2484 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2485 }
2486
2488 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2489
2490 RegsToPassVector RegsToPass;
2491 SmallVector<SDValue, 8> MemOpChains;
2492
2493 // During a tail call, stores to the argument area must happen after all of
2494 // the function's incoming arguments have been loaded because they may alias.
2495 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2496 // there's no point in doing so repeatedly so this tracks whether that's
2497 // happened yet.
2498 bool AfterFormalArgLoads = false;
2499
2500 // Walk the register/memloc assignments, inserting copies/loads. In the case
2501 // of tail call optimization, arguments are handled later.
2502 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2503 i != e;
2504 ++i, ++realArgIdx) {
2505 CCValAssign &VA = ArgLocs[i];
2506 SDValue Arg = OutVals[realArgIdx];
2507 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2508 bool isByVal = Flags.isByVal();
2509
2510 // Promote the value if needed.
2511 switch (VA.getLocInfo()) {
2512 default: llvm_unreachable("Unknown loc info!");
2513 case CCValAssign::Full: break;
2514 case CCValAssign::SExt:
2515 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2516 break;
2517 case CCValAssign::ZExt:
2518 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2519 break;
2520 case CCValAssign::AExt:
2521 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2522 break;
2523 case CCValAssign::BCvt:
2524 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2525 break;
2526 }
2527
2528 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2529 Chain = DAG.getStackArgumentTokenFactor(Chain);
2530 AfterFormalArgLoads = true;
2531 }
2532
2533 // f16 arguments have their size extended to 4 bytes and passed as if they
2534 // had been copied to the LSBs of a 32-bit register.
2535 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2536 if (VA.needsCustom() &&
2537 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2538 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2539 } else {
2540 // f16 arguments could have been extended prior to argument lowering.
2541 // Mask them arguments if this is a CMSE nonsecure call.
2542 auto ArgVT = Outs[realArgIdx].ArgVT;
2543 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2544 auto LocBits = VA.getLocVT().getSizeInBits();
2545 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2546 SDValue Mask =
2547 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2548 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2549 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2550 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2551 }
2552 }
2553
2554 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2555 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2556 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2557 DAG.getConstant(0, dl, MVT::i32));
2558 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2559 DAG.getConstant(1, dl, MVT::i32));
2560
2561 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2562 StackPtr, MemOpChains, isTailCall, SPDiff);
2563
2564 VA = ArgLocs[++i]; // skip ahead to next loc
2565 if (VA.isRegLoc()) {
2566 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2567 StackPtr, MemOpChains, isTailCall, SPDiff);
2568 } else {
2569 assert(VA.isMemLoc());
2570 SDValue DstAddr;
2571 MachinePointerInfo DstInfo;
2572 std::tie(DstAddr, DstInfo) =
2573 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2574 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2575 }
2576 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2577 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2578 StackPtr, MemOpChains, isTailCall, SPDiff);
2579 } else if (VA.isRegLoc()) {
2580 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2581 Outs[0].VT == MVT::i32) {
2582 assert(VA.getLocVT() == MVT::i32 &&
2583 "unexpected calling convention register assignment");
2584 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2585 "unexpected use of 'returned'");
2586 isThisReturn = true;
2587 }
2588 const TargetOptions &Options = DAG.getTarget().Options;
2589 if (Options.EmitCallSiteInfo)
2590 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2591 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2592 } else if (isByVal) {
2593 assert(VA.isMemLoc());
2594 unsigned offset = 0;
2595
2596 // True if this byval aggregate will be split between registers
2597 // and memory.
2598 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2599 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2600
2601 if (CurByValIdx < ByValArgsCount) {
2602
2603 unsigned RegBegin, RegEnd;
2604 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2605
2606 EVT PtrVT =
2608 unsigned int i, j;
2609 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2610 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2611 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2612 SDValue Load =
2613 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2614 DAG.InferPtrAlign(AddArg));
2615 MemOpChains.push_back(Load.getValue(1));
2616 RegsToPass.push_back(std::make_pair(j, Load));
2617 }
2618
2619 // If parameter size outsides register area, "offset" value
2620 // helps us to calculate stack slot for remained part properly.
2621 offset = RegEnd - RegBegin;
2622
2623 CCInfo.nextInRegsParam();
2624 }
2625
2626 if (Flags.getByValSize() > 4*offset) {
2627 auto PtrVT = getPointerTy(DAG.getDataLayout());
2628 SDValue Dst;
2629 MachinePointerInfo DstInfo;
2630 std::tie(Dst, DstInfo) =
2631 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2632 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2633 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2634 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2635 MVT::i32);
2636 SDValue AlignNode =
2637 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2638
2639 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2640 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2641 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2642 Ops));
2643 }
2644 } else {
2645 assert(VA.isMemLoc());
2646 SDValue DstAddr;
2647 MachinePointerInfo DstInfo;
2648 std::tie(DstAddr, DstInfo) =
2649 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2650
2651 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2652 MemOpChains.push_back(Store);
2653 }
2654 }
2655
2656 if (!MemOpChains.empty())
2657 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2658
2659 // Build a sequence of copy-to-reg nodes chained together with token chain
2660 // and flag operands which copy the outgoing args into the appropriate regs.
2661 SDValue InGlue;
2662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2663 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2664 RegsToPass[i].second, InGlue);
2665 InGlue = Chain.getValue(1);
2666 }
2667
2668 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2669 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2670 // node so that legalize doesn't hack it.
2671 bool isDirect = false;
2672
2674 const GlobalValue *GVal = nullptr;
2675 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2676 GVal = G->getGlobal();
2677 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2678
2679 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2680 bool isLocalARMFunc = false;
2681 auto PtrVt = getPointerTy(DAG.getDataLayout());
2682
2683 if (Subtarget->genLongCalls()) {
2684 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2685 "long-calls codegen is not position independent!");
2686 // Handle a global address or an external symbol. If it's not one of
2687 // those, the target's already in a register, so we don't need to do
2688 // anything extra.
2689 if (isa<GlobalAddressSDNode>(Callee)) {
2690 if (Subtarget->genExecuteOnly()) {
2691 if (Subtarget->useMovt())
2692 ++NumMovwMovt;
2693 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2694 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2695 } else {
2696 // Create a constant pool entry for the callee address
2697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2699 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2700
2701 // Get the address of the callee into a register
2702 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2703 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2704 Callee = DAG.getLoad(
2705 PtrVt, dl, DAG.getEntryNode(), Addr,
2707 }
2708 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2709 const char *Sym = S->getSymbol();
2710
2711 if (Subtarget->genExecuteOnly()) {
2712 if (Subtarget->useMovt())
2713 ++NumMovwMovt;
2714 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2715 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2716 } else {
2717 // Create a constant pool entry for the callee address
2718 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2720 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2721
2722 // Get the address of the callee into a register
2723 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2724 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2725 Callee = DAG.getLoad(
2726 PtrVt, dl, DAG.getEntryNode(), Addr,
2728 }
2729 }
2730 } else if (isa<GlobalAddressSDNode>(Callee)) {
2731 if (!PreferIndirect) {
2732 isDirect = true;
2733 bool isDef = GVal->isStrongDefinitionForLinker();
2734
2735 // ARM call to a local ARM function is predicable.
2736 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2737 // tBX takes a register source operand.
2738 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2739 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2740 Callee = DAG.getNode(
2741 ARMISD::WrapperPIC, dl, PtrVt,
2742 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2743 Callee = DAG.getLoad(
2744 PtrVt, dl, DAG.getEntryNode(), Callee,
2748 } else if (Subtarget->isTargetCOFF()) {
2749 assert(Subtarget->isTargetWindows() &&
2750 "Windows is the only supported COFF target");
2751 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2752 if (GVal->hasDLLImportStorageClass())
2753 TargetFlags = ARMII::MO_DLLIMPORT;
2754 else if (!TM.shouldAssumeDSOLocal(GVal))
2755 TargetFlags = ARMII::MO_COFFSTUB;
2756 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2757 TargetFlags);
2758 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2759 Callee =
2760 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2761 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2763 } else {
2764 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2765 }
2766 }
2767 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2768 isDirect = true;
2769 // tBX takes a register source operand.
2770 const char *Sym = S->getSymbol();
2771 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2772 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2775 ARMPCLabelIndex, 4);
2776 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2777 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2778 Callee = DAG.getLoad(
2779 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2781 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2782 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2783 } else {
2784 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2785 }
2786 }
2787
2788 if (isCmseNSCall) {
2789 assert(!isARMFunc && !isDirect &&
2790 "Cannot handle call to ARM function or direct call");
2791 if (NumBytes > 0) {
2793 "call to non-secure function would "
2794 "require passing arguments on stack",
2795 dl.getDebugLoc());
2796 DAG.getContext()->diagnose(Diag);
2797 }
2798 if (isStructRet) {
2801 "call to non-secure function would return value through pointer",
2802 dl.getDebugLoc());
2803 DAG.getContext()->diagnose(Diag);
2804 }
2805 }
2806
2807 // FIXME: handle tail calls differently.
2808 unsigned CallOpc;
2809 if (Subtarget->isThumb()) {
2810 if (GuardWithBTI)
2811 CallOpc = ARMISD::t2CALL_BTI;
2812 else if (isCmseNSCall)
2813 CallOpc = ARMISD::tSECALL;
2814 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2815 CallOpc = ARMISD::CALL_NOLINK;
2816 else
2817 CallOpc = ARMISD::CALL;
2818 } else {
2819 if (!isDirect && !Subtarget->hasV5TOps())
2820 CallOpc = ARMISD::CALL_NOLINK;
2821 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2822 // Emit regular call when code size is the priority
2823 !Subtarget->hasMinSize())
2824 // "mov lr, pc; b _foo" to avoid confusing the RSP
2825 CallOpc = ARMISD::CALL_NOLINK;
2826 else
2827 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2828 }
2829
2830 // We don't usually want to end the call-sequence here because we would tidy
2831 // the frame up *after* the call, however in the ABI-changing tail-call case
2832 // we've carefully laid out the parameters so that when sp is reset they'll be
2833 // in the correct location.
2834 if (isTailCall && !isSibCall) {
2835 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2836 InGlue = Chain.getValue(1);
2837 }
2838
2839 std::vector<SDValue> Ops;
2840 Ops.push_back(Chain);
2841 Ops.push_back(Callee);
2842
2843 if (isTailCall) {
2844 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2845 }
2846
2847 // Add argument registers to the end of the list so that they are known live
2848 // into the call.
2849 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2850 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2851 RegsToPass[i].second.getValueType()));
2852
2853 // Add a register mask operand representing the call-preserved registers.
2854 const uint32_t *Mask;
2855 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2856 if (isThisReturn) {
2857 // For 'this' returns, use the R0-preserving mask if applicable
2858 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2859 if (!Mask) {
2860 // Set isThisReturn to false if the calling convention is not one that
2861 // allows 'returned' to be modeled in this way, so LowerCallResult does
2862 // not try to pass 'this' straight through
2863 isThisReturn = false;
2864 Mask = ARI->getCallPreservedMask(MF, CallConv);
2865 }
2866 } else
2867 Mask = ARI->getCallPreservedMask(MF, CallConv);
2868
2869 assert(Mask && "Missing call preserved mask for calling convention");
2870 Ops.push_back(DAG.getRegisterMask(Mask));
2871
2872 if (InGlue.getNode())
2873 Ops.push_back(InGlue);
2874
2875 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2876 if (isTailCall) {
2878 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2879 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2880 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2881 return Ret;
2882 }
2883
2884 // Returns a chain and a flag for retval copy to use.
2885 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2886 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2887 InGlue = Chain.getValue(1);
2888 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2889
2890 // If we're guaranteeing tail-calls will be honoured, the callee must
2891 // pop its own argument stack on return. But this call is *not* a tail call so
2892 // we need to undo that after it returns to restore the status-quo.
2893 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2894 uint64_t CalleePopBytes =
2895 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2896
2897 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2898 if (!Ins.empty())
2899 InGlue = Chain.getValue(1);
2900
2901 // Handle result values, copying them out of physregs into vregs that we
2902 // return.
2903 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2904 InVals, isThisReturn,
2905 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
2906}
2907
2908/// HandleByVal - Every parameter *after* a byval parameter is passed
2909/// on the stack. Remember the next parameter register to allocate,
2910/// and then confiscate the rest of the parameter registers to insure
2911/// this.
2912void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2913 Align Alignment) const {
2914 // Byval (as with any stack) slots are always at least 4 byte aligned.
2915 Alignment = std::max(Alignment, Align(4));
2916
2917 unsigned Reg = State->AllocateReg(GPRArgRegs);
2918 if (!Reg)
2919 return;
2920
2921 unsigned AlignInRegs = Alignment.value() / 4;
2922 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2923 for (unsigned i = 0; i < Waste; ++i)
2924 Reg = State->AllocateReg(GPRArgRegs);
2925
2926 if (!Reg)
2927 return;
2928
2929 unsigned Excess = 4 * (ARM::R4 - Reg);
2930
2931 // Special case when NSAA != SP and parameter size greater than size of
2932 // all remained GPR regs. In that case we can't split parameter, we must
2933 // send it to stack. We also must set NCRN to R4, so waste all
2934 // remained registers.
2935 const unsigned NSAAOffset = State->getStackSize();
2936 if (NSAAOffset != 0 && Size > Excess) {
2937 while (State->AllocateReg(GPRArgRegs))
2938 ;
2939 return;
2940 }
2941
2942 // First register for byval parameter is the first register that wasn't
2943 // allocated before this method call, so it would be "reg".
2944 // If parameter is small enough to be saved in range [reg, r4), then
2945 // the end (first after last) register would be reg + param-size-in-regs,
2946 // else parameter would be splitted between registers and stack,
2947 // end register would be r4 in this case.
2948 unsigned ByValRegBegin = Reg;
2949 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2950 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2951 // Note, first register is allocated in the beginning of function already,
2952 // allocate remained amount of registers we need.
2953 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2954 State->AllocateReg(GPRArgRegs);
2955 // A byval parameter that is split between registers and memory needs its
2956 // size truncated here.
2957 // In the case where the entire structure fits in registers, we set the
2958 // size in memory to zero.
2959 Size = std::max<int>(Size - Excess, 0);
2960}
2961
2962/// MatchingStackOffset - Return true if the given stack call argument is
2963/// already available in the same position (relatively) of the caller's
2964/// incoming argument stack.
2965static
2968 const TargetInstrInfo *TII) {
2969 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2970 int FI = std::numeric_limits<int>::max();
2971 if (Arg.getOpcode() == ISD::CopyFromReg) {
2972 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2973 if (!VR.isVirtual())
2974 return false;
2975 MachineInstr *Def = MRI->getVRegDef(VR);
2976 if (!Def)
2977 return false;
2978 if (!Flags.isByVal()) {
2979 if (!TII->isLoadFromStackSlot(*Def, FI))
2980 return false;
2981 } else {
2982 return false;
2983 }
2984 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2985 if (Flags.isByVal())
2986 // ByVal argument is passed in as a pointer but it's now being
2987 // dereferenced. e.g.
2988 // define @foo(%struct.X* %A) {
2989 // tail call @bar(%struct.X* byval %A)
2990 // }
2991 return false;
2992 SDValue Ptr = Ld->getBasePtr();
2993 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2994 if (!FINode)
2995 return false;
2996 FI = FINode->getIndex();
2997 } else
2998 return false;
2999
3000 assert(FI != std::numeric_limits<int>::max());
3001 if (!MFI.isFixedObjectIndex(FI))
3002 return false;
3003 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3004}
3005
3006/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3007/// for tail call optimization. Targets which want to do tail call
3008/// optimization should implement this function. Note that this function also
3009/// processes musttail calls, so when this function returns false on a valid
3010/// musttail call, a fatal backend error occurs.
3011bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3013 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3014 CallingConv::ID CalleeCC = CLI.CallConv;
3015 SDValue Callee = CLI.Callee;
3016 bool isVarArg = CLI.IsVarArg;
3017 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3018 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3020 const SelectionDAG &DAG = CLI.DAG;
3022 const Function &CallerF = MF.getFunction();
3023 CallingConv::ID CallerCC = CallerF.getCallingConv();
3024
3025 assert(Subtarget->supportsTailCall());
3026
3027 // Indirect tail calls cannot be optimized for Thumb1 if the args
3028 // to the call take up r0-r3. The reason is that there are no legal registers
3029 // left to hold the pointer to the function to be called.
3030 // Similarly, if the function uses return address sign and authentication,
3031 // r12 is needed to hold the PAC and is not available to hold the callee
3032 // address.
3033 if (Outs.size() >= 4 &&
3034 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3035 if (Subtarget->isThumb1Only())
3036 return false;
3037 // Conservatively assume the function spills LR.
3039 return false;
3040 }
3041
3042 // Look for obvious safe cases to perform tail call optimization that do not
3043 // require ABI changes. This is what gcc calls sibcall.
3044
3045 // Exception-handling functions need a special set of instructions to indicate
3046 // a return to the hardware. Tail-calling another function would probably
3047 // break this.
3048 if (CallerF.hasFnAttribute("interrupt"))
3049 return false;
3050
3051 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3052 return CalleeCC == CallerCC;
3053
3054 // Also avoid sibcall optimization if either caller or callee uses struct
3055 // return semantics.
3056 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3057 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3058 if (isCalleeStructRet || isCallerStructRet)
3059 return false;
3060
3061 // Externally-defined functions with weak linkage should not be
3062 // tail-called on ARM when the OS does not support dynamic
3063 // pre-emption of symbols, as the AAELF spec requires normal calls
3064 // to undefined weak functions to be replaced with a NOP or jump to the
3065 // next instruction. The behaviour of branch instructions in this
3066 // situation (as used for tail calls) is implementation-defined, so we
3067 // cannot rely on the linker replacing the tail call with a return.
3068 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3069 const GlobalValue *GV = G->getGlobal();
3071 if (GV->hasExternalWeakLinkage() &&
3072 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3073 return false;
3074 }
3075
3076 // Check that the call results are passed in the same way.
3077 LLVMContext &C = *DAG.getContext();
3079 getEffectiveCallingConv(CalleeCC, isVarArg),
3080 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3081 CCAssignFnForReturn(CalleeCC, isVarArg),
3082 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3083 return false;
3084 // The callee has to preserve all registers the caller needs to preserve.
3085 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3086 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3087 if (CalleeCC != CallerCC) {
3088 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3089 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3090 return false;
3091 }
3092
3093 // If Caller's vararg or byval argument has been split between registers and
3094 // stack, do not perform tail call, since part of the argument is in caller's
3095 // local frame.
3096 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3097 if (AFI_Caller->getArgRegsSaveSize())
3098 return false;
3099
3100 // If the callee takes no arguments then go on to check the results of the
3101 // call.
3102 if (!Outs.empty()) {
3103 if (CCInfo.getStackSize()) {
3104 // Check if the arguments are already laid out in the right way as
3105 // the caller's fixed stack objects.
3106 MachineFrameInfo &MFI = MF.getFrameInfo();
3107 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3108 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3109 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3110 i != e;
3111 ++i, ++realArgIdx) {
3112 CCValAssign &VA = ArgLocs[i];
3113 EVT RegVT = VA.getLocVT();
3114 SDValue Arg = OutVals[realArgIdx];
3115 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3117 return false;
3118 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3119 // f64 and vector types are split into multiple registers or
3120 // register/stack-slot combinations. The types will not match
3121 // the registers; give up on memory f64 refs until we figure
3122 // out what to do about this.
3123 if (!VA.isRegLoc())
3124 return false;
3125 if (!ArgLocs[++i].isRegLoc())
3126 return false;
3127 if (RegVT == MVT::v2f64) {
3128 if (!ArgLocs[++i].isRegLoc())
3129 return false;
3130 if (!ArgLocs[++i].isRegLoc())
3131 return false;
3132 }
3133 } else if (!VA.isRegLoc()) {
3135 MFI, MRI, TII))
3136 return false;
3137 }
3138 }
3139 }
3140
3141 const MachineRegisterInfo &MRI = MF.getRegInfo();
3142 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3143 return false;
3144 }
3145
3146 return true;
3147}
3148
3149bool
3150ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3151 MachineFunction &MF, bool isVarArg,
3153 LLVMContext &Context) const {
3155 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3156 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3157}
3158
3160 const SDLoc &DL, SelectionDAG &DAG) {
3161 const MachineFunction &MF = DAG.getMachineFunction();
3162 const Function &F = MF.getFunction();
3163
3164 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3165
3166 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3167 // version of the "preferred return address". These offsets affect the return
3168 // instruction if this is a return from PL1 without hypervisor extensions.
3169 // IRQ/FIQ: +4 "subs pc, lr, #4"
3170 // SWI: 0 "subs pc, lr, #0"
3171 // ABORT: +4 "subs pc, lr, #4"
3172 // UNDEF: +4/+2 "subs pc, lr, #0"
3173 // UNDEF varies depending on where the exception came from ARM or Thumb
3174 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3175
3176 int64_t LROffset;
3177 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3178 IntKind == "ABORT")
3179 LROffset = 4;
3180 else if (IntKind == "SWI" || IntKind == "UNDEF")
3181 LROffset = 0;
3182 else
3183 report_fatal_error("Unsupported interrupt attribute. If present, value "
3184 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3185
3186 RetOps.insert(RetOps.begin() + 1,
3187 DAG.getConstant(LROffset, DL, MVT::i32, false));
3188
3189 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3190}
3191
3192SDValue
3193ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3194 bool isVarArg,
3196 const SmallVectorImpl<SDValue> &OutVals,
3197 const SDLoc &dl, SelectionDAG &DAG) const {
3198 // CCValAssign - represent the assignment of the return value to a location.
3200
3201 // CCState - Info about the registers and stack slots.
3202 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3203 *DAG.getContext());
3204
3205 // Analyze outgoing return values.
3206 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3207
3208 SDValue Glue;
3210 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3211 bool isLittleEndian = Subtarget->isLittle();
3212
3215 AFI->setReturnRegsCount(RVLocs.size());
3216
3217 // Report error if cmse entry function returns structure through first ptr arg.
3218 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3219 // Note: using an empty SDLoc(), as the first line of the function is a
3220 // better place to report than the last line.
3223 "secure entry function would return value through pointer",
3224 SDLoc().getDebugLoc());
3225 DAG.getContext()->diagnose(Diag);
3226 }
3227
3228 // Copy the result values into the output registers.
3229 for (unsigned i = 0, realRVLocIdx = 0;
3230 i != RVLocs.size();
3231 ++i, ++realRVLocIdx) {
3232 CCValAssign &VA = RVLocs[i];
3233 assert(VA.isRegLoc() && "Can only return in registers!");
3234
3235 SDValue Arg = OutVals[realRVLocIdx];
3236 bool ReturnF16 = false;
3237
3238 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3239 // Half-precision return values can be returned like this:
3240 //
3241 // t11 f16 = fadd ...
3242 // t12: i16 = bitcast t11
3243 // t13: i32 = zero_extend t12
3244 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3245 //
3246 // to avoid code generation for bitcasts, we simply set Arg to the node
3247 // that produces the f16 value, t11 in this case.
3248 //
3249 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3250 SDValue ZE = Arg.getOperand(0);
3251 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3252 SDValue BC = ZE.getOperand(0);
3253 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3254 Arg = BC.getOperand(0);
3255 ReturnF16 = true;
3256 }
3257 }
3258 }
3259 }
3260
3261 switch (VA.getLocInfo()) {
3262 default: llvm_unreachable("Unknown loc info!");
3263 case CCValAssign::Full: break;
3264 case CCValAssign::BCvt:
3265 if (!ReturnF16)
3266 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3267 break;
3268 }
3269
3270 // Mask f16 arguments if this is a CMSE nonsecure entry.
3271 auto RetVT = Outs[realRVLocIdx].ArgVT;
3272 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3273 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3274 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3275 } else {
3276 auto LocBits = VA.getLocVT().getSizeInBits();
3277 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3278 SDValue Mask =
3279 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3280 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3281 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3282 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3283 }
3284 }
3285
3286 if (VA.needsCustom() &&
3287 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3288 if (VA.getLocVT() == MVT::v2f64) {
3289 // Extract the first half and return it in two registers.
3290 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3291 DAG.getConstant(0, dl, MVT::i32));
3292 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3293 DAG.getVTList(MVT::i32, MVT::i32), Half);
3294
3295 Chain =
3296 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3297 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3298 Glue = Chain.getValue(1);
3299 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3300 VA = RVLocs[++i]; // skip ahead to next loc
3301 Chain =
3302 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3303 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3304 Glue = Chain.getValue(1);
3305 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3306 VA = RVLocs[++i]; // skip ahead to next loc
3307
3308 // Extract the 2nd half and fall through to handle it as an f64 value.
3309 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3310 DAG.getConstant(1, dl, MVT::i32));
3311 }
3312 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3313 // available.
3314 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3315 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3316 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3317 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3318 Glue = Chain.getValue(1);
3319 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3320 VA = RVLocs[++i]; // skip ahead to next loc
3321 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3322 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3323 } else
3324 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3325
3326 // Guarantee that all emitted copies are
3327 // stuck together, avoiding something bad.
3328 Glue = Chain.getValue(1);
3329 RetOps.push_back(DAG.getRegister(
3330 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3331 }
3332 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3333 const MCPhysReg *I =
3334 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3335 if (I) {
3336 for (; *I; ++I) {
3337 if (ARM::GPRRegClass.contains(*I))
3338 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3339 else if (ARM::DPRRegClass.contains(*I))
3341 else
3342 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3343 }
3344 }
3345
3346 // Update chain and glue.
3347 RetOps[0] = Chain;
3348 if (Glue.getNode())
3349 RetOps.push_back(Glue);
3350
3351 // CPUs which aren't M-class use a special sequence to return from
3352 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3353 // though we use "subs pc, lr, #N").
3354 //
3355 // M-class CPUs actually use a normal return sequence with a special
3356 // (hardware-provided) value in LR, so the normal code path works.
3357 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3358 !Subtarget->isMClass()) {
3359 if (Subtarget->isThumb1Only())
3360 report_fatal_error("interrupt attribute is not supported in Thumb1");
3361 return LowerInterruptReturn(RetOps, dl, DAG);
3362 }
3363
3366 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3367}
3368
3369bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3370 if (N->getNumValues() != 1)
3371 return false;
3372 if (!N->hasNUsesOfValue(1, 0))
3373 return false;
3374
3375 SDValue TCChain = Chain;
3376 SDNode *Copy = *N->use_begin();
3377 if (Copy->getOpcode() == ISD::CopyToReg) {
3378 // If the copy has a glue operand, we conservatively assume it isn't safe to
3379 // perform a tail call.
3380 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3381 return false;
3382 TCChain = Copy->getOperand(0);
3383 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3384 SDNode *VMov = Copy;
3385 // f64 returned in a pair of GPRs.
3387 for (SDNode *U : VMov->uses()) {
3388 if (U->getOpcode() != ISD::CopyToReg)
3389 return false;
3390 Copies.insert(U);
3391 }
3392 if (Copies.size() > 2)
3393 return false;
3394
3395 for (SDNode *U : VMov->uses()) {
3396 SDValue UseChain = U->getOperand(0);
3397 if (Copies.count(UseChain.getNode()))
3398 // Second CopyToReg
3399 Copy = U;
3400 else {
3401 // We are at the top of this chain.
3402 // If the copy has a glue operand, we conservatively assume it
3403 // isn't safe to perform a tail call.
3404 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3405 return false;
3406 // First CopyToReg
3407 TCChain = UseChain;
3408 }
3409 }
3410 } else if (Copy->getOpcode() == ISD::BITCAST) {
3411 // f32 returned in a single GPR.
3412 if (!Copy->hasOneUse())
3413 return false;
3414 Copy = *Copy->use_begin();
3415 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3416 return false;
3417 // If the copy has a glue operand, we conservatively assume it isn't safe to
3418 // perform a tail call.
3419 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3420 return false;
3421 TCChain = Copy->getOperand(0);
3422 } else {
3423 return false;
3424 }
3425
3426 bool HasRet = false;
3427 for (const SDNode *U : Copy->uses()) {
3428 if (U->getOpcode() != ARMISD::RET_GLUE &&
3429 U->getOpcode() != ARMISD::INTRET_GLUE)
3430 return false;
3431 HasRet = true;
3432 }
3433
3434 if (!HasRet)
3435 return false;
3436
3437 Chain = TCChain;
3438 return true;
3439}
3440
3441bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3442 if (!Subtarget->supportsTailCall())
3443 return false;
3444
3445 if (!CI->isTailCall())
3446 return false;
3447
3448 return true;
3449}
3450
3451// Trying to write a 64 bit value so need to split into two 32 bit values first,
3452// and pass the lower and high parts through.
3454 SDLoc DL(Op);
3455 SDValue WriteValue = Op->getOperand(2);
3456
3457 // This function is only supposed to be called for i64 type argument.
3458 assert(WriteValue.getValueType() == MVT::i64
3459 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3460
3461 SDValue Lo, Hi;
3462 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3463 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3464 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3465}
3466
3467// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3468// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3469// one of the above mentioned nodes. It has to be wrapped because otherwise
3470// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3471// be used to form addressing mode. These wrapped nodes will be selected
3472// into MOVi.
3473SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3474 SelectionDAG &DAG) const {
3475 EVT PtrVT = Op.getValueType();
3476 // FIXME there is no actual debug info here
3477 SDLoc dl(Op);
3478 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3479 SDValue Res;
3480
3481 // When generating execute-only code Constant Pools must be promoted to the
3482 // global data section. It's a bit ugly that we can't share them across basic
3483 // blocks, but this way we guarantee that execute-only behaves correct with
3484 // position-independent addressing modes.
3485 if (Subtarget->genExecuteOnly()) {
3486 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3487 auto T = const_cast<Type*>(CP->getType());
3488 auto C = const_cast<Constant*>(CP->getConstVal());
3489 auto M = const_cast<Module*>(DAG.getMachineFunction().
3491 auto GV = new GlobalVariable(
3492 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3495 Twine(AFI->createPICLabelUId())
3496 );
3497 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3498 dl, PtrVT);
3499 return LowerGlobalAddress(GA, DAG);
3500 }
3501
3502 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3503 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3504 Align CPAlign = CP->getAlign();
3505 if (Subtarget->isThumb1Only())
3506 CPAlign = std::max(CPAlign, Align(4));
3507 if (CP->isMachineConstantPoolEntry())
3508 Res =
3509 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3510 else
3511 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3512 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3513}
3514
3516 // If we don't have a 32-bit pc-relative branch instruction then the jump
3517 // table consists of block addresses. Usually this is inline, but for
3518 // execute-only it must be placed out-of-line.
3519 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3522}
3523
3524SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3525 SelectionDAG &DAG) const {
3528 unsigned ARMPCLabelIndex = 0;
3529 SDLoc DL(Op);
3530 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3531 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3532 SDValue CPAddr;
3533 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3534 if (!IsPositionIndependent) {
3535 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3536 } else {
3537 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3538 ARMPCLabelIndex = AFI->createPICLabelUId();
3540 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3541 ARMCP::CPBlockAddress, PCAdj);
3542 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3543 }
3544 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3545 SDValue Result = DAG.getLoad(
3546 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3548 if (!IsPositionIndependent)
3549 return Result;
3550 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3551 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3552}
3553
3554/// Convert a TLS address reference into the correct sequence of loads
3555/// and calls to compute the variable's address for Darwin, and return an
3556/// SDValue containing the final node.
3557
3558/// Darwin only has one TLS scheme which must be capable of dealing with the
3559/// fully general situation, in the worst case. This means:
3560/// + "extern __thread" declaration.
3561/// + Defined in a possibly unknown dynamic library.
3562///
3563/// The general system is that each __thread variable has a [3 x i32] descriptor
3564/// which contains information used by the runtime to calculate the address. The
3565/// only part of this the compiler needs to know about is the first word, which
3566/// contains a function pointer that must be called with the address of the
3567/// entire descriptor in "r0".
3568///
3569/// Since this descriptor may be in a different unit, in general access must
3570/// proceed along the usual ARM rules. A common sequence to produce is:
3571///
3572/// movw rT1, :lower16:_var$non_lazy_ptr
3573/// movt rT1, :upper16:_var$non_lazy_ptr
3574/// ldr r0, [rT1]
3575/// ldr rT2, [r0]
3576/// blx rT2
3577/// [...address now in r0...]
3578SDValue
3579ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3580 SelectionDAG &DAG) const {
3581 assert(Subtarget->isTargetDarwin() &&
3582 "This function expects a Darwin target");
3583 SDLoc DL(Op);
3584
3585 // First step is to get the address of the actua global symbol. This is where
3586 // the TLS descriptor lives.
3587 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3588
3589 // The first entry in the descriptor is a function pointer that we must call
3590 // to obtain the address of the variable.
3591 SDValue Chain = DAG.getEntryNode();
3592 SDValue FuncTLVGet = DAG.getLoad(
3593 MVT::i32, DL, Chain, DescAddr,
3597 Chain = FuncTLVGet.getValue(1);
3598
3600 MachineFrameInfo &MFI = F.getFrameInfo();
3601 MFI.setAdjustsStack(true);
3602
3603 // TLS calls preserve all registers except those that absolutely must be
3604 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3605 // silly).
3606 auto TRI =
3608 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3610
3611 // Finally, we can make the call. This is just a degenerate version of a
3612 // normal AArch64 call node: r0 takes the address of the descriptor, and
3613 // returns the address of the variable in this thread.
3614 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3615 Chain =
3616 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3617 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3618 DAG.getRegisterMask(Mask), Chain.getValue(1));
3619 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3620}
3621
3622SDValue
3623ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3624 SelectionDAG &DAG) const {
3625 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3626
3627 SDValue Chain = DAG.getEntryNode();
3628 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3629 SDLoc DL(Op);
3630
3631 // Load the current TEB (thread environment block)
3632 SDValue Ops[] = {Chain,
3633 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3634 DAG.getTargetConstant(15, DL, MVT::i32),
3635 DAG.getTargetConstant(0, DL, MVT::i32),
3636 DAG.getTargetConstant(13, DL, MVT::i32),
3637 DAG.getTargetConstant(0, DL, MVT::i32),
3638 DAG.getTargetConstant(2, DL, MVT::i32)};
3639 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3640 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3641
3642 SDValue TEB = CurrentTEB.getValue(0);
3643 Chain = CurrentTEB.getValue(1);
3644
3645 // Load the ThreadLocalStoragePointer from the TEB
3646 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3647 SDValue TLSArray =
3648 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3649 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3650
3651 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3652 // offset into the TLSArray.
3653
3654 // Load the TLS index from the C runtime
3655 SDValue TLSIndex =
3656 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3657 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3658 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3659
3660 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3661 DAG.getConstant(2, DL, MVT::i32));
3662 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3663 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3665
3666 // Get the offset of the start of the .tls section (section base)
3667 const auto *GA = cast<GlobalAddressSDNode>(Op);
3668 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3669 SDValue Offset = DAG.getLoad(
3670 PtrVT, DL, Chain,
3671 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3672 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3674
3675 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3676}
3677
3678// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3679SDValue
3680ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3681 SelectionDAG &DAG) const {
3682 SDLoc dl(GA);
3683 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3684 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3687 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3689 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3690 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3691 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3692 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3693 Argument = DAG.getLoad(
3694 PtrVT, dl, DAG.getEntryNode(), Argument,
3696 SDValue Chain = Argument.getValue(1);
3697
3698 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3699 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3700
3701 // call __tls_get_addr.
3703 ArgListEntry Entry;
3704 Entry.Node = Argument;
3705 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3706 Args.push_back(Entry);
3707
3708 // FIXME: is there useful debug info available here?
3710 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3712 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3713
3714 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3715 return CallResult.first;
3716}
3717
3718// Lower ISD::GlobalTLSAddress using the "initial exec" or
3719// "local exec" model.
3720SDValue
3721ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3722 SelectionDAG &DAG,
3723 TLSModel::Model model) const {
3724 const GlobalValue *GV = GA->getGlobal();
3725 SDLoc dl(GA);
3727 SDValue Chain = DAG.getEntryNode();
3728 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3729 // Get the Thread Pointer
3731
3732 if (model == TLSModel::InitialExec) {
3735 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3736 // Initial exec model.
3737 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3739 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3741 true);
3742 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3743 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3744 Offset = DAG.getLoad(
3745 PtrVT, dl, Chain, Offset,
3747 Chain = Offset.getValue(1);
3748
3749 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3750 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3751
3752 Offset = DAG.getLoad(
3753 PtrVT, dl, Chain, Offset,
3755 } else {
3756 // local exec model
3757 assert(model == TLSModel::LocalExec);
3760 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3761 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3762 Offset = DAG.getLoad(
3763 PtrVT, dl, Chain, Offset,
3765 }
3766
3767 // The address of the thread local variable is the add of the thread
3768 // pointer with the offset of the variable.
3769 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3770}
3771
3772SDValue
3773ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3774 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3775 if (DAG.getTarget().useEmulatedTLS())
3776 return LowerToTLSEmulatedModel(GA, DAG);
3777
3778 if (Subtarget->isTargetDarwin())
3779 return LowerGlobalTLSAddressDarwin(Op, DAG);
3780
3781 if (Subtarget->isTargetWindows())
3782 return LowerGlobalTLSAddressWindows(Op, DAG);
3783
3784 // TODO: implement the "local dynamic" model
3785 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3787
3788 switch (model) {
3791 return LowerToTLSGeneralDynamicModel(GA, DAG);
3794 return LowerToTLSExecModels(GA, DAG, model);
3795 }
3796 llvm_unreachable("bogus TLS model");
3797}
3798
3799/// Return true if all users of V are within function F, looking through
3800/// ConstantExprs.
3801static bool allUsersAreInFunction(const Value *V, const Function *F) {
3802 SmallVector<const User*,4> Worklist(V->users());
3803 while (!Worklist.empty()) {
3804 auto *U = Worklist.pop_back_val();
3805 if (isa<ConstantExpr>(U)) {
3806 append_range(Worklist, U->users());
3807 continue;
3808 }
3809
3810 auto *I = dyn_cast<Instruction>(U);
3811 if (!I || I->getParent()->getParent() != F)
3812 return false;
3813 }
3814 return true;
3815}
3816
3818 const GlobalValue *GV, SelectionDAG &DAG,
3819 EVT PtrVT, const SDLoc &dl) {
3820 // If we're creating a pool entry for a constant global with unnamed address,
3821 // and the global is small enough, we can emit it inline into the constant pool
3822 // to save ourselves an indirection.
3823 //
3824 // This is a win if the constant is only used in one function (so it doesn't
3825 // need to be duplicated) or duplicating the constant wouldn't increase code
3826 // size (implying the constant is no larger than 4 bytes).
3827 const Function &F = DAG.getMachineFunction().getFunction();
3828
3829 // We rely on this decision to inline being idemopotent and unrelated to the
3830 // use-site. We know that if we inline a variable at one use site, we'll
3831 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3832 // doesn't know about this optimization, so bail out if it's enabled else
3833 // we could decide to inline here (and thus never emit the GV) but require
3834 // the GV from fast-isel generated code.
3837 return SDValue();
3838
3839 auto *GVar = dyn_cast<GlobalVariable>(GV);
3840 if (!GVar || !GVar->hasInitializer() ||
3841 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3842 !GVar->hasLocalLinkage())
3843 return SDValue();
3844
3845 // If we inline a value that contains relocations, we move the relocations
3846 // from .data to .text. This is not allowed in position-independent code.
3847 auto *Init = GVar->getInitializer();
3848 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3849 Init->needsDynamicRelocation())
3850 return SDValue();
3851
3852 // The constant islands pass can only really deal with alignment requests
3853 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3854 // any type wanting greater alignment requirements than 4 bytes. We also
3855 // can only promote constants that are multiples of 4 bytes in size or
3856 // are paddable to a multiple of 4. Currently we only try and pad constants
3857 // that are strings for simplicity.
3858 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3859 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3860 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3861 unsigned RequiredPadding = 4 - (Size % 4);
3862 bool PaddingPossible =
3863 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3864 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3865 Size == 0)
3866 return SDValue();
3867
3868 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3871
3872 // We can't bloat the constant pool too much, else the ConstantIslands pass
3873 // may fail to converge. If we haven't promoted this global yet (it may have
3874 // multiple uses), and promoting it would increase the constant pool size (Sz
3875 // > 4), ensure we have space to do so up to MaxTotal.
3876 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3877 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3879 return SDValue();
3880
3881 // This is only valid if all users are in a single function; we can't clone
3882 // the constant in general. The LLVM IR unnamed_addr allows merging
3883 // constants, but not cloning them.
3884 //
3885 // We could potentially allow cloning if we could prove all uses of the
3886 // constant in the current function don't care about the address, like
3887 // printf format strings. But that isn't implemented for now.
3888 if (!allUsersAreInFunction(GVar, &F))
3889 return SDValue();
3890
3891 // We're going to inline this global. Pad it out if needed.
3892 if (RequiredPadding != 4) {
3893 StringRef S = CDAInit->getAsString();
3894
3896 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3897 while (RequiredPadding--)
3898 V.push_back(0);
3900 }
3901
3902 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3903 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3904 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3907 PaddedSize - 4);
3908 }
3909 ++NumConstpoolPromoted;
3910 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3911}
3912
3914 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3915 if (!(GV = GA->getAliaseeObject()))
3916 return false;
3917 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3918 return V->isConstant();
3919 return isa<Function>(GV);
3920}
3921
3922SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3923 SelectionDAG &DAG) const {
3924 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3925 default: llvm_unreachable("unknown object format");
3926 case Triple::COFF:
3927 return LowerGlobalAddressWindows(Op, DAG);
3928 case Triple::ELF:
3929 return LowerGlobalAddressELF(Op, DAG);
3930 case Triple::MachO:
3931 return LowerGlobalAddressDarwin(Op, DAG);
3932 }
3933}
3934
3935SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3936 SelectionDAG &DAG) const {
3937 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3938 SDLoc dl(Op);
3939 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3940 bool IsRO = isReadOnly(GV);
3941
3942 // promoteToConstantPool only if not generating XO text section
3943 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3944 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3945 return V;
3946
3947 if (isPositionIndependent()) {
3949 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3950 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3951 if (!GV->isDSOLocal())
3952 Result =
3953 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3955 return Result;
3956 } else if (Subtarget->isROPI() && IsRO) {
3957 // PC-relative.
3958 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3959 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3960 return Result;
3961 } else if (Subtarget->isRWPI() && !IsRO) {
3962 // SB-relative.
3963 SDValue RelAddr;
3964 if (Subtarget->useMovt()) {
3965 ++NumMovwMovt;
3966 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3967 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3968 } else { // use literal pool for address constant
3971 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3972 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3973 RelAddr = DAG.getLoad(
3974 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3976 }
3977 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3978 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3979 return Result;
3980 }
3981
3982 // If we have T2 ops, we can materialize the address directly via movt/movw
3983 // pair. This is always cheaper. If need to generate Execute Only code, and we
3984 // only have Thumb1 available, we can't use a constant pool and are forced to
3985 // use immediate relocations.
3986 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3987 if (Subtarget->useMovt())
3988 ++NumMovwMovt;
3989 // FIXME: Once remat is capable of dealing with instructions with register
3990 // operands, expand this into two nodes.
3991 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3992 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3993 } else {
3994 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3995 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3996 return DAG.getLoad(
3997 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3999 }
4000}
4001
4002SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
4003 SelectionDAG &DAG) const {
4004 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4005 "ROPI/RWPI not currently supported for Darwin");
4006 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4007 SDLoc dl(Op);
4008 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4009
4010 if (Subtarget->useMovt())
4011 ++NumMovwMovt;
4012
4013 // FIXME: Once remat is capable of dealing with instructions with register
4014 // operands, expand this into multiple nodes
4015 unsigned Wrapper =
4017
4018 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4019 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4020
4021 if (Subtarget->isGVIndirectSymbol(GV))
4022 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4024 return Result;
4025}
4026
4027SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4028 SelectionDAG &DAG) const {
4029 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4030 assert(Subtarget->useMovt() &&
4031 "Windows on ARM expects to use movw/movt");
4032 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4033 "ROPI/RWPI not currently supported for Windows");
4034
4036 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4037 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4038 if (GV->hasDLLImportStorageClass())
4039 TargetFlags = ARMII::MO_DLLIMPORT;
4040 else if (!TM.shouldAssumeDSOLocal(GV))
4041 TargetFlags = ARMII::MO_COFFSTUB;
4042 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4044 SDLoc DL(Op);
4045
4046 ++NumMovwMovt;
4047
4048 // FIXME: Once remat is capable of dealing with instructions with register
4049 // operands, expand this into two nodes.
4050 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4051 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4052 TargetFlags));
4053 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4054 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4056 return Result;
4057}
4058
4059SDValue
4060ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4061 SDLoc dl(Op);
4062 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4063 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4064 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4065 Op.getOperand(1), Val);
4066}
4067
4068SDValue
4069ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4070 SDLoc dl(Op);
4071 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4072 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4073}
4074
4075SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4076 SelectionDAG &DAG) const {
4077 SDLoc dl(Op);
4078 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4079 Op.getOperand(0));
4080}
4081
4082SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4083 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4084 unsigned IntNo =
4085 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4086 switch (IntNo) {
4087 default:
4088 return SDValue(); // Don't custom lower most intrinsics.
4089 case Intrinsic::arm_gnu_eabi_mcount: {
4091 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4092 SDLoc dl(Op);
4093 SDValue Chain = Op.getOperand(0);
4094 // call "\01__gnu_mcount_nc"
4095 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4096 const uint32_t *Mask =
4098 assert(Mask && "Missing call preserved mask for calling convention");
4099 // Mark LR an implicit live-in.
4100 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4101 SDValue ReturnAddress =
4102 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4103 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4104 SDValue Callee =
4105 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4107 if (Subtarget->isThumb())
4108 return SDValue(
4109 DAG.getMachineNode(
4110 ARM::tBL_PUSHLR, dl, ResultTys,
4111 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4112 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4113 0);
4114 return SDValue(
4115 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4116 {ReturnAddress, Callee, RegisterMask, Chain}),
4117 0);
4118 }
4119 }
4120}
4121
4122SDValue
4123ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4124 const ARMSubtarget *Subtarget) const {
4125 unsigned IntNo = Op.getConstantOperandVal(0);
4126 SDLoc dl(Op);
4127 switch (IntNo) {
4128 default: return SDValue(); // Don't custom lower most intrinsics.
4129 case Intrinsic::thread_pointer: {
4130 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4131 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4132 }
4133 case Intrinsic::arm_cls: {
4134 const SDValue &Operand = Op.getOperand(1);
4135 const EVT VTy = Op.getValueType();
4136 SDValue SRA =
4137 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4138 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4139 SDValue SHL =
4140 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4141 SDValue OR =
4142 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4143 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4144 return Result;
4145 }
4146 case Intrinsic::arm_cls64: {
4147 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4148 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4149 const SDValue &Operand = Op.getOperand(1);
4150 const EVT VTy = Op.getValueType();
4151 SDValue Lo, Hi;
4152 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4153 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4154 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4155 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4156 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4157 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4158 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4159 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4160 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4161 SDValue CheckLo =
4162 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4163 SDValue HiIsZero =
4164 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4165 SDValue AdjustedLo =
4166 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4167 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4168 SDValue Result =
4169 DAG.getSelect(dl, VTy, CheckLo,
4170 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4171 return Result;
4172 }
4173 case Intrinsic::eh_sjlj_lsda: {
4176 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4177 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4178 SDValue CPAddr;
4179 bool IsPositionIndependent = isPositionIndependent();
4180 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4182 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4183 ARMCP::CPLSDA, PCAdj);
4184 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4185 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4186 SDValue Result = DAG.getLoad(
4187 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4189
4190 if (IsPositionIndependent) {
4191 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4192 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4193 }
4194 return Result;
4195 }
4196 case Intrinsic::arm_neon_vabs:
4197 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4198 Op.getOperand(1));
4199 case Intrinsic::arm_neon_vabds:
4200 if (Op.getValueType().isInteger())
4201 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4202 Op.getOperand(1), Op.getOperand(2));
4203 return SDValue();
4204 case Intrinsic::arm_neon_vabdu:
4205 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4206 Op.getOperand(1), Op.getOperand(2));
4207 case Intrinsic::arm_neon_vmulls:
4208 case Intrinsic::arm_neon_vmullu: {
4209 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4211 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4212 Op.getOperand(1), Op.getOperand(2));
4213 }
4214 case Intrinsic::arm_neon_vminnm:
4215 case Intrinsic::arm_neon_vmaxnm: {
4216 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4218 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4219 Op.getOperand(1), Op.getOperand(2));
4220 }
4221 case Intrinsic::arm_neon_vminu:
4222 case Intrinsic::arm_neon_vmaxu: {
4223 if (Op.getValueType().isFloatingPoint())
4224 return SDValue();
4225 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4226 ? ISD::UMIN : ISD::UMAX;
4227 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4228 Op.getOperand(1), Op.getOperand(2));
4229 }
4230 case Intrinsic::arm_neon_vmins:
4231 case Intrinsic::arm_neon_vmaxs: {
4232 // v{min,max}s is overloaded between signed integers and floats.
4233 if (!Op.getValueType().isFloatingPoint()) {
4234 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4235 ? ISD::SMIN : ISD::SMAX;
4236 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4237 Op.getOperand(1), Op.getOperand(2));
4238 }
4239 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4241 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4242 Op.getOperand(1), Op.getOperand(2));
4243 }
4244 case Intrinsic::arm_neon_vtbl1:
4245 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4246 Op.getOperand(1), Op.getOperand(2));
4247 case Intrinsic::arm_neon_vtbl2:
4248 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4249 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4250 case Intrinsic::arm_mve_pred_i2v:
4251 case Intrinsic::arm_mve_pred_v2i:
4252 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4253 Op.getOperand(1));
4254 case Intrinsic::arm_mve_vreinterpretq:
4255 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4256 Op.getOperand(1));
4257 case Intrinsic::arm_mve_lsll:
4258 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4259 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4260 case Intrinsic::arm_mve_asrl:
4261 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4262 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4263 }
4264}
4265
4267 const ARMSubtarget *Subtarget) {
4268 SDLoc dl(Op);
4269 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4270 if (SSID == SyncScope::SingleThread)
4271 return Op;
4272
4273 if (!Subtarget->hasDataBarrier()) {
4274 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4275 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4276 // here.
4277 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4278 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4279 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4280 DAG.getConstant(0, dl, MVT::i32));
4281 }
4282
4283 AtomicOrdering Ord =
4284 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4286 if (Subtarget->isMClass()) {
4287 // Only a full system barrier exists in the M-class architectures.
4289 } else if (Subtarget->preferISHSTBarriers() &&
4290 Ord == AtomicOrdering::Release) {
4291 // Swift happens to implement ISHST barriers in a way that's compatible with
4292 // Release semantics but weaker than ISH so we'd be fools not to use
4293 // it. Beware: other processors probably don't!
4295 }
4296
4297 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4298 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4299 DAG.getConstant(Domain, dl, MVT::i32));
4300}
4301
4303 const ARMSubtarget *Subtarget) {
4304 // ARM pre v5TE and Thumb1 does not have preload instructions.
4305 if (!(Subtarget->isThumb2() ||
4306 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4307 // Just preserve the chain.
4308 return Op.getOperand(0);
4309
4310 SDLoc dl(Op);
4311 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4312 if (!isRead &&
4313 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4314 // ARMv7 with MP extension has PLDW.
4315 return Op.getOperand(0);
4316
4317 unsigned isData = Op.getConstantOperandVal(4);
4318 if (Subtarget->isThumb()) {
4319 // Invert the bits.
4320 isRead = ~isRead & 1;
4321 isData = ~isData & 1;
4322 }
4323
4324 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4325 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4326 DAG.getConstant(isData, dl, MVT::i32));
4327}
4328
4331 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4332
4333 // vastart just stores the address of the VarArgsFrameIndex slot into the
4334 // memory location argument.
4335 SDLoc dl(Op);
4337 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4338 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4339 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4340 MachinePointerInfo(SV));
4341}
4342
4343SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4344 CCValAssign &NextVA,
4345 SDValue &Root,
4346 SelectionDAG &DAG,
4347 const SDLoc &dl) const {
4350
4351 const TargetRegisterClass *RC;
4352 if (AFI->isThumb1OnlyFunction())
4353 RC = &ARM::tGPRRegClass;
4354 else
4355 RC = &ARM::GPRRegClass;
4356
4357 // Transform the arguments stored in physical registers into virtual ones.
4358 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4359 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4360
4361 SDValue ArgValue2;
4362 if (NextVA.isMemLoc()) {
4363 MachineFrameInfo &MFI = MF.getFrameInfo();
4364 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4365
4366 // Create load node to retrieve arguments from the stack.
4367 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4368 ArgValue2 = DAG.getLoad(
4369 MVT::i32, dl, Root, FIN,
4371 } else {
4372 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4373 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4374 }
4375 if (!Subtarget->isLittle())
4376 std::swap (ArgValue, ArgValue2);
4377 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4378}
4379
4380// The remaining GPRs hold either the beginning of variable-argument
4381// data, or the beginning of an aggregate passed by value (usually
4382// byval). Either way, we allocate stack slots adjacent to the data
4383// provided by our caller, and store the unallocated registers there.
4384// If this is a variadic function, the va_list pointer will begin with
4385// these values; otherwise, this reassembles a (byval) structure that
4386// was split between registers and memory.
4387// Return: The frame index registers were stored into.
4388int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4389 const SDLoc &dl, SDValue &Chain,
4390 const Value *OrigArg,
4391 unsigned InRegsParamRecordIdx,
4392 int ArgOffset, unsigned ArgSize) const {
4393 // Currently, two use-cases possible:
4394 // Case #1. Non-var-args function, and we meet first byval parameter.
4395 // Setup first unallocated register as first byval register;
4396 // eat all remained registers
4397 // (these two actions are performed by HandleByVal method).
4398 // Then, here, we initialize stack frame with
4399 // "store-reg" instructions.
4400 // Case #2. Var-args function, that doesn't contain byval parameters.
4401 // The same: eat all remained unallocated registers,
4402 // initialize stack frame.
4403
4405 MachineFrameInfo &MFI = MF.getFrameInfo();
4407 unsigned RBegin, REnd;
4408 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4409 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4410 } else {
4411 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4412 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4413 REnd = ARM::R4;
4414 }
4415
4416 if (REnd != RBegin)
4417 ArgOffset = -4 * (ARM::R4 - RBegin);
4418
4419 auto PtrVT = getPointerTy(DAG.getDataLayout());
4420 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4421 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4422
4424 const TargetRegisterClass *RC =
4425 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4426
4427 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4428 Register VReg = MF.addLiveIn(Reg, RC);
4429 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4430 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4431 MachinePointerInfo(OrigArg, 4 * i));
4432 MemOps.push_back(Store);
4433 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4434 }
4435
4436 if (!MemOps.empty())
4437 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4438 return FrameIndex;
4439}
4440
4441// Setup stack frame, the va_list pointer will start from.
4442void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4443 const SDLoc &dl, SDValue &Chain,
4444 unsigned ArgOffset,
4445 unsigned TotalArgRegsSaveSize,
4446 bool ForceMutable) const {
4449
4450 // Try to store any remaining integer argument regs
4451 // to their spots on the stack so that they may be loaded by dereferencing
4452 // the result of va_next.
4453 // If there is no regs to be stored, just point address after last
4454 // argument passed via stack.
4455 int FrameIndex = StoreByValRegs(
4456 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4457 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4458 AFI->setVarArgsFrameIndex(FrameIndex);
4459}
4460
4461bool ARMTargetLowering::splitValueIntoRegisterParts(
4462 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4463 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4464 EVT ValueVT = Val.getValueType();
4465 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4466 unsigned ValueBits = ValueVT.getSizeInBits();
4467 unsigned PartBits = PartVT.getSizeInBits();
4468 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4469 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4470 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4471 Parts[0] = Val;
4472 return true;
4473 }
4474 return false;
4475}
4476
4477SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4478 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4479 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4480 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4481 unsigned ValueBits = ValueVT.getSizeInBits();
4482 unsigned PartBits = PartVT.getSizeInBits();
4483 SDValue Val = Parts[0];
4484
4485 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4486 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4487 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4488 return Val;
4489 }
4490 return SDValue();
4491}
4492
4493SDValue ARMTargetLowering::LowerFormalArguments(
4494 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4495 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4496 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4498 MachineFrameInfo &MFI = MF.getFrameInfo();
4499
4501
4502 // Assign locations to all of the incoming arguments.
4504 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4505 *DAG.getContext());
4506 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4507
4509 unsigned CurArgIdx = 0;
4510
4511 // Initially ArgRegsSaveSize is zero.
4512 // Then we increase this value each time we meet byval parameter.
4513 // We also increase this value in case of varargs function.
4514 AFI->setArgRegsSaveSize(0);
4515
4516 // Calculate the amount of stack space that we need to allocate to store
4517 // byval and variadic arguments that are passed in registers.
4518 // We need to know this before we allocate the first byval or variadic
4519 // argument, as they will be allocated a stack slot below the CFA (Canonical
4520 // Frame Address, the stack pointer at entry to the function).
4521 unsigned ArgRegBegin = ARM::R4;
4522 for (const CCValAssign &VA : ArgLocs) {
4523 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4524 break;
4525
4526 unsigned Index = VA.getValNo();
4527 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4528 if (!Flags.isByVal())
4529 continue;
4530
4531 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4532 unsigned RBegin, REnd;
4533 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4534 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4535
4536 CCInfo.nextInRegsParam();
4537 }
4538 CCInfo.rewindByValRegsInfo();
4539
4540 int lastInsIndex = -1;
4541 if (isVarArg && MFI.hasVAStart()) {
4542 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4543 if (RegIdx != std::size(GPRArgRegs))
4544 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4545 }
4546
4547 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4548 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4549 auto PtrVT = getPointerTy(DAG.getDataLayout());
4550
4551 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4552 CCValAssign &VA = ArgLocs[i];
4553 if (Ins[VA.getValNo()].isOrigArg()) {
4554 std::advance(CurOrigArg,
4555 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4556 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4557 }
4558 // Arguments stored in registers.
4559 if (VA.isRegLoc()) {
4560 EVT RegVT = VA.getLocVT();
4561 SDValue ArgValue;
4562
4563 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4564 // f64 and vector types are split up into multiple registers or
4565 // combinations of registers and stack slots.
4566 SDValue ArgValue1 =
4567 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4568 VA = ArgLocs[++i]; // skip ahead to next loc
4569 SDValue ArgValue2;
4570 if (VA.isMemLoc()) {
4571 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4572 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4573 ArgValue2 = DAG.getLoad(
4574 MVT::f64, dl, Chain, FIN,
4576 } else {
4577 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4578 }
4579 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4580 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4581 ArgValue1, DAG.getIntPtrConstant(0, dl));
4582 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4583 ArgValue2, DAG.getIntPtrConstant(1, dl));
4584 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4585 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4586 } else {
4587 const TargetRegisterClass *RC;
4588
4589 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4590 RC = &ARM::HPRRegClass;
4591 else if (RegVT == MVT::f32)
4592 RC = &ARM::SPRRegClass;
4593 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4594 RegVT == MVT::v4bf16)
4595 RC = &ARM::DPRRegClass;
4596 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4597 RegVT == MVT::v8bf16)
4598 RC = &ARM::QPRRegClass;
4599 else if (RegVT == MVT::i32)
4600 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4601 : &ARM::GPRRegClass;
4602 else
4603 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4604
4605 // Transform the arguments in physical registers into virtual ones.
4606 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4607 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4608
4609 // If this value is passed in r0 and has the returned attribute (e.g.
4610 // C++ 'structors), record this fact for later use.
4611 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4612 AFI->setPreservesR0();
4613 }
4614 }
4615
4616 // If this is an 8 or 16-bit value, it is really passed promoted
4617 // to 32 bits. Insert an assert[sz]ext to capture this, then
4618 // truncate to the right size.
4619 switch (VA.getLocInfo()) {
4620 default: llvm_unreachable("Unknown loc info!");
4621 case CCValAssign::Full: break;
4622 case CCValAssign::BCvt:
4623 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4624 break;
4625 }
4626
4627 // f16 arguments have their size extended to 4 bytes and passed as if they
4628 // had been copied to the LSBs of a 32-bit register.
4629 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4630 if (VA.needsCustom() &&
4631 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4632 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4633
4634 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4635 // less than 32 bits must be sign- or zero-extended in the callee for
4636 // security reasons. Although the ABI mandates an extension done by the
4637 // caller, the latter cannot be trusted to follow the rules of the ABI.
4638 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4639 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4640 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4641 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4642
4643 InVals.push_back(ArgValue);
4644 } else { // VA.isRegLoc()
4645 // Only arguments passed on the stack should make it here.
4646 assert(VA.isMemLoc());
4647 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4648
4649 int index = VA.getValNo();
4650
4651 // Some Ins[] entries become multiple ArgLoc[] entries.
4652 // Process them only once.
4653 if (index != lastInsIndex)
4654 {
4655 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4656 // FIXME: For now, all byval parameter objects are marked mutable.
4657 // This can be changed with more analysis.
4658 // In case of tail call optimization mark all arguments mutable.
4659 // Since they could be overwritten by lowering of arguments in case of
4660 // a tail call.
4661 if (Flags.isByVal()) {
4662 assert(Ins[index].isOrigArg() &&
4663 "Byval arguments cannot be implicit");
4664 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4665
4666 int FrameIndex = StoreByValRegs(
4667 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4668 VA.getLocMemOffset(), Flags.getByValSize());
4669 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4670 CCInfo.nextInRegsParam();
4671 } else {
4672 unsigned FIOffset = VA.getLocMemOffset();
4673 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4674 FIOffset, true);
4675
4676 // Create load nodes to retrieve arguments from the stack.
4677 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4678 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4680 DAG.getMachineFunction(), FI)));
4681 }
4682 lastInsIndex = index;
4683 }
4684 }
4685 }
4686
4687 // varargs
4688 if (isVarArg && MFI.hasVAStart()) {
4689 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4690 TotalArgRegsSaveSize);
4691 if (AFI->isCmseNSEntryFunction()) {
4694 "secure entry function must not be variadic", dl.getDebugLoc());
4695 DAG.getContext()->diagnose(Diag);
4696 }
4697 }
4698
4699 unsigned StackArgSize = CCInfo.getStackSize();
4700 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4701 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4702 // The only way to guarantee a tail call is if the callee restores its
4703 // argument area, but it must also keep the stack aligned when doing so.
4704 const DataLayout &DL = DAG.getDataLayout();
4705 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4706
4707 AFI->setArgumentStackToRestore(StackArgSize);
4708 }
4709 AFI->setArgumentStackSize(StackArgSize);
4710
4711 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4714 "secure entry function requires arguments on stack", dl.getDebugLoc());
4715 DAG.getContext()->diagnose(Diag);
4716 }
4717
4718 return Chain;
4719}
4720
4721/// isFloatingPointZero - Return true if this is +0.0.
4723 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4724 return CFP->getValueAPF().isPosZero();
4725 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4726 // Maybe this has already been legalized into the constant pool?
4727 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4728 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4729 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4730 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4731 return CFP->getValueAPF().isPosZero();
4732 }
4733 } else if (Op->getOpcode() == ISD::BITCAST &&
4734 Op->getValueType(0) == MVT::f64) {
4735 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4736 // created by LowerConstantFP().
4737 SDValue BitcastOp = Op->getOperand(0);
4738 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4739 isNullConstant(BitcastOp->getOperand(0)))
4740 return true;
4741 }
4742 return false;
4743}
4744
4745/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4746/// the given operands.
4747SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4748 SDValue &ARMcc, SelectionDAG &DAG,
4749 const SDLoc &dl) const {
4750 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4751 unsigned C = RHSC->getZExtValue();
4752 if (!isLegalICmpImmediate((int32_t)C)) {
4753 // Constant does not fit, try adjusting it by one.
4754 switch (CC) {
4755 default: break;
4756 case ISD::SETLT:
4757 case ISD::SETGE:
4758 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4760 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4761 }
4762 break;
4763 case ISD::SETULT:
4764 case ISD::SETUGE:
4765 if (C != 0 && isLegalICmpImmediate(C-1)) {
4767 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4768 }
4769 break;
4770 case ISD::SETLE:
4771 case ISD::SETGT:
4772 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4774 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4775 }
4776 break;
4777 case ISD::SETULE:
4778 case ISD::SETUGT:
4779 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4781 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4782 }
4783 break;
4784 }
4785 }
4786 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4788 // In ARM and Thumb-2, the compare instructions can shift their second
4789 // operand.
4791 std::swap(LHS, RHS);
4792 }
4793
4794 // Thumb1 has very limited immediate modes, so turning an "and" into a
4795 // shift can save multiple instructions.
4796 //
4797 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4798 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4799 // own. If it's the operand to an unsigned comparison with an immediate,
4800 // we can eliminate one of the shifts: we transform
4801 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4802 //
4803 // We avoid transforming cases which aren't profitable due to encoding
4804 // details:
4805 //
4806 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4807 // would not; in that case, we're essentially trading one immediate load for
4808 // another.
4809 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4810 // 3. C2 is zero; we have other code for this special case.
4811 //
4812 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4813 // instruction, since the AND is always one instruction anyway, but we could
4814 // use narrow instructions in some cases.
4815 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4816 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4817 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4818 !isSignedIntSetCC(CC)) {
4819 unsigned Mask = LHS.getConstantOperandVal(1);
4820 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4821 uint64_t RHSV = RHSC->getZExtValue();
4822 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4823 unsigned ShiftBits = llvm::countl_zero(Mask);
4824 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4825 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4826 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4827 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4828 }
4829 }
4830 }
4831
4832 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4833 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4834 // way a cmp would.
4835 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4836 // some tweaks to the heuristics for the previous and->shift transform.
4837 // FIXME: Optimize cases where the LHS isn't a shift.
4838 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4839 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4840 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4841 LHS.getConstantOperandVal(1) < 31) {
4842 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4843 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4844 DAG.getVTList(MVT::i32, MVT::i32),
4845 LHS.getOperand(0),
4846 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4847 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4848 Shift.getValue(1), SDValue());
4849 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4850 return Chain.getValue(1);
4851 }
4852
4854
4855 // If the RHS is a constant zero then the V (overflow) flag will never be
4856 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4857 // simpler for other passes (like the peephole optimiser) to deal with.
4858 if (isNullConstant(RHS)) {
4859 switch (CondCode) {
4860 default: break;
4861 case ARMCC::GE:
4863 break;
4864 case ARMCC::LT:
4866 break;
4867 }
4868 }
4869
4870 ARMISD::NodeType CompareType;
4871 switch (CondCode) {
4872 default:
4873 CompareType = ARMISD::CMP;
4874 break;
4875 case ARMCC::EQ:
4876 case ARMCC::NE:
4877 // Uses only Z Flag
4878 CompareType = ARMISD::CMPZ;
4879 break;
4880 }
4881 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4882 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4883}
4884
4885/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4886SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4887 SelectionDAG &DAG, const SDLoc &dl,
4888 bool Signaling) const {
4889 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4890 SDValue Cmp;
4891 if (!isFloatingPointZero(RHS))
4892 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4893 dl, MVT::Glue, LHS, RHS);
4894 else
4895 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4896 dl, MVT::Glue, LHS);
4897 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4898}
4899
4900/// duplicateCmp - Glue values can have only one use, so this function
4901/// duplicates a comparison node.
4902SDValue
4903ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4904 unsigned Opc = Cmp.getOpcode();
4905 SDLoc DL(Cmp);
4906 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4907 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4908
4909 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4910 Cmp = Cmp.getOperand(0);
4911 Opc = Cmp.getOpcode();
4912 if (Opc == ARMISD::CMPFP)
4913 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4914 else {
4915 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4916 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4917 }
4918 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4919}
4920
4921// This function returns three things: the arithmetic computation itself
4922// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4923// comparison and the condition code define the case in which the arithmetic
4924// computation *does not* overflow.
4925std::pair<SDValue, SDValue>
4926ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4927 SDValue &ARMcc) const {
4928 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4929
4930 SDValue Value, OverflowCmp;
4931 SDValue LHS = Op.getOperand(0);
4932 SDValue RHS = Op.getOperand(1);
4933 SDLoc dl(Op);
4934
4935 // FIXME: We are currently always generating CMPs because we don't support
4936 // generating CMN through the backend. This is not as good as the natural
4937 // CMP case because it causes a register dependency and cannot be folded
4938 // later.
4939
4940 switch (Op.getOpcode()) {
4941 default:
4942 llvm_unreachable("Unknown overflow instruction!");
4943 case ISD::SADDO:
4944 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4945 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4946 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4947 break;
4948 case ISD::UADDO:
4949 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4950 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4951 // We do not use it in the USUBO case as Value may not be used.
4952 Value = DAG.getNode(ARMISD::ADDC, dl,
4953 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4954 .getValue(0);
4955 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4956 break;
4957 case ISD::SSUBO:
4958 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4959 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4960 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4961 break;
4962 case ISD::USUBO:
4963 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4964 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4965 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4966 break;
4967 case ISD::UMULO:
4968 // We generate a UMUL_LOHI and then check if the high word is 0.
4969 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4970 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4971 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4972 LHS, RHS);
4973 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4974 DAG.getConstant(0, dl, MVT::i32));
4975 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4976 break;
4977 case ISD::SMULO:
4978 // We generate a SMUL_LOHI and then check if all the bits of the high word
4979 // are the same as the sign bit of the low word.
4980 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4981 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4982 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4983 LHS, RHS);
4984 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4985 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4986 Value.getValue(0),
4987 DAG.getConstant(31, dl, MVT::i32)));
4988 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4989 break;
4990 } // switch (...)
4991
4992 return std::make_pair(Value, OverflowCmp);
4993}
4994
4995SDValue
4996ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4997 // Let legalize expand this if it isn't a legal type yet.
4998 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4999 return SDValue();
5000
5001 SDValue Value, OverflowCmp;
5002 SDValue ARMcc;
5003 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
5004 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5005 SDLoc dl(Op);
5006 // We use 0 and 1 as false and true values.
5007 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
5008 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
5009 EVT VT = Op.getValueType();
5010
5011 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
5012 ARMcc, CCR, OverflowCmp);
5013
5014 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
5015 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5016}
5017
5019 SelectionDAG &DAG) {
5020 SDLoc DL(BoolCarry);
5021 EVT CarryVT = BoolCarry.getValueType();
5022
5023 // This converts the boolean value carry into the carry flag by doing
5024 // ARMISD::SUBC Carry, 1
5025 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5026 DAG.getVTList(CarryVT, MVT::i32),
5027 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5028 return Carry.getValue(1);
5029}
5030
5032 SelectionDAG &DAG) {
5033 SDLoc DL(Flags);
5034
5035 // Now convert the carry flag into a boolean carry. We do this
5036 // using ARMISD:ADDE 0, 0, Carry
5037 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5038 DAG.getConstant(0, DL, MVT::i32),
5039 DAG.getConstant(0, DL, MVT::i32), Flags);
5040}
5041
5042SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5043 SelectionDAG &DAG) const {
5044 // Let legalize expand this if it isn't a legal type yet.
5045 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5046 return SDValue();
5047
5048 SDValue LHS = Op.getOperand(0);
5049 SDValue RHS = Op.getOperand(1);
5050 SDLoc dl(Op);
5051
5052 EVT VT = Op.getValueType();
5053 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5054 SDValue Value;
5055 SDValue Overflow;
5056 switch (Op.getOpcode()) {
5057 default:
5058 llvm_unreachable("Unknown overflow instruction!");
5059 case ISD::UADDO:
5060 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5061 // Convert the carry flag into a boolean value.
5062 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5063 break;
5064 case ISD::USUBO: {
5065 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5066 // Convert the carry flag into a boolean value.
5067 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5068 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5069 // value. So compute 1 - C.
5070 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5071 DAG.getConstant(1, dl, MVT::i32), Overflow);
5072 break;
5073 }
5074 }
5075
5076 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5077}
5078
5080 const ARMSubtarget *Subtarget) {
5081 EVT VT = Op.getValueType();
5082 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5083 return SDValue();
5084 if (!VT.isSimple())
5085 return SDValue();
5086
5087 unsigned NewOpcode;
5088 switch (VT.getSimpleVT().SimpleTy) {
5089 default:
5090 return SDValue();
5091 case MVT::i8:
5092 switch (Op->getOpcode()) {
5093 case ISD::UADDSAT:
5094 NewOpcode = ARMISD::UQADD8b;
5095 break;
5096 case ISD::SADDSAT:
5097 NewOpcode = ARMISD::QADD8b;
5098 break;
5099 case ISD::USUBSAT:
5100 NewOpcode = ARMISD::UQSUB8b;
5101 break;
5102 case ISD::SSUBSAT:
5103 NewOpcode = ARMISD::QSUB8b;
5104 break;
5105 }
5106 break;
5107 case MVT::i16:
5108 switch (Op->getOpcode()) {
5109 case ISD::UADDSAT:
5110 NewOpcode = ARMISD::UQADD16b;
5111 break;
5112 case ISD::SADDSAT:
5113 NewOpcode = ARMISD::QADD16b;
5114 break;
5115 case ISD::USUBSAT:
5116 NewOpcode = ARMISD::UQSUB16b;
5117 break;
5118 case ISD::SSUBSAT:
5119 NewOpcode = ARMISD::QSUB16b;
5120 break;
5121 }
5122 break;
5123 }
5124
5125 SDLoc dl(Op);
5126 SDValue Add =
5127 DAG.getNode(NewOpcode, dl, MVT::i32,
5128 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5129 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5130 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5131}
5132
5133SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5134 SDValue Cond = Op.getOperand(0);
5135 SDValue SelectTrue = Op.getOperand(1);
5136 SDValue SelectFalse = Op.getOperand(2);
5137 SDLoc dl(Op);
5138 unsigned Opc = Cond.getOpcode();
5139
5140 if (Cond.getResNo() == 1 &&
5141 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5142 Opc == ISD::USUBO)) {
5143 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5144 return SDValue();
5145
5146 SDValue Value, OverflowCmp;
5147 SDValue ARMcc;
5148 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5149 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5150 EVT VT = Op.getValueType();
5151
5152 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5153 OverflowCmp, DAG);
5154 }
5155
5156 // Convert:
5157 //
5158 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5159 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5160 //
5161 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5162 const ConstantSDNode *CMOVTrue =
5163 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5164 const ConstantSDNode *CMOVFalse =
5165 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5166
5167 if (CMOVTrue && CMOVFalse) {
5168 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5169 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5170
5171 SDValue True;
5172 SDValue False;
5173 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5174 True = SelectTrue;
5175 False = SelectFalse;
5176 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5177 True = SelectFalse;
5178 False = SelectTrue;
5179 }
5180
5181 if (True.getNode() && False.getNode()) {
5182 EVT VT = Op.getValueType();
5183 SDValue ARMcc = Cond.getOperand(2);
5184 SDValue CCR = Cond.getOperand(3);
5185 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5186 assert(True.getValueType() == VT);
5187 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5188 }
5189 }
5190 }
5191
5192 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5193 // undefined bits before doing a full-word comparison with zero.
5194 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5195 DAG.getConstant(1, dl, Cond.getValueType()));
5196
5197 return DAG.getSelectCC(dl, Cond,
5198 DAG.getConstant(0, dl, Cond.getValueType()),
5199 SelectTrue, SelectFalse, ISD::SETNE);
5200}
5201
5203 bool &swpCmpOps, bool &swpVselOps) {
5204 // Start by selecting the GE condition code for opcodes that return true for
5205 // 'equality'
5206 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5207 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5208 CondCode = ARMCC::GE;
5209
5210 // and GT for opcodes that return false for 'equality'.
5211 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5212 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5213 CondCode = ARMCC::GT;
5214
5215 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5216 // to swap the compare operands.
5217 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5218 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5219 swpCmpOps = true;
5220
5221 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5222 // If we have an unordered opcode, we need to swap the operands to the VSEL
5223 // instruction (effectively negating the condition).
5224 //
5225 // This also has the effect of swapping which one of 'less' or 'greater'
5226 // returns true, so we also swap the compare operands. It also switches
5227 // whether we return true for 'equality', so we compensate by picking the
5228 // opposite condition code to our original choice.
5229 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5230 CC == ISD::SETUGT) {
5231 swpCmpOps = !swpCmpOps;
5232 swpVselOps = !swpVselOps;
5233 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5234 }
5235
5236 // 'ordered' is 'anything but unordered', so use the VS condition code and
5237 // swap the VSEL operands.
5238 if (CC == ISD::SETO) {
5239 CondCode = ARMCC::VS;
5240 swpVselOps = true;
5241 }
5242
5243 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5244 // code and swap the VSEL operands. Also do this if we don't care about the
5245 // unordered case.
5246 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5247 CondCode = ARMCC::EQ;
5248 swpVselOps = true;
5249 }
5250}
5251
5252SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5253 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5254 SDValue Cmp, SelectionDAG &DAG) const {
5255 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5257 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5259 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5260
5261 SDValue TrueLow = TrueVal.getValue(0);
5262 SDValue TrueHigh = TrueVal.getValue(1);
5263 SDValue FalseLow = FalseVal.getValue(0);
5264 SDValue FalseHigh = FalseVal.getValue(1);
5265
5266 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5267 ARMcc, CCR, Cmp);
5268 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5269 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5270
5271 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5272 } else {
5273 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5274 Cmp);
5275 }
5276}
5277
5279 return CC == ISD::SETGT || CC == ISD::SETGE;
5280}
5281
5283 return CC == ISD::SETLT || CC == ISD::SETLE;
5284}
5285
5286// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5287// All of these conditions (and their <= and >= counterparts) will do:
5288// x < k ? k : x
5289// x > k ? x : k
5290// k < x ? x : k
5291// k > x ? k : x
5292static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5293 const SDValue TrueVal, const SDValue FalseVal,
5294 const ISD::CondCode CC, const SDValue K) {
5295 return (isGTorGE(CC) &&
5296 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5297 (isLTorLE(CC) &&
5298 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5299}
5300
5301// Check if two chained conditionals could be converted into SSAT or USAT.
5302//
5303// SSAT can replace a set of two conditional selectors that bound a number to an
5304// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5305//
5306// x < -k ? -k : (x > k ? k : x)
5307// x < -k ? -k : (x < k ? x : k)
5308// x > -k ? (x > k ? k : x) : -k
5309// x < k ? (x < -k ? -k : x) : k
5310// etc.
5311//
5312// LLVM canonicalizes these to either a min(max()) or a max(min())
5313// pattern. This function tries to match one of these and will return a SSAT
5314// node if successful.
5315//
5316// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5317// is a power of 2.
5319 EVT VT = Op.getValueType();
5320 SDValue V1 = Op.getOperand(0);
5321 SDValue K1 = Op.getOperand(1);
5322 SDValue TrueVal1 = Op.getOperand(2);
5323 SDValue FalseVal1 = Op.getOperand(3);
5324 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5325
5326 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5327 if (Op2.getOpcode() != ISD::SELECT_CC)
5328 return SDValue();
5329
5330 SDValue V2 = Op2.getOperand(0);
5331 SDValue K2 = Op2.getOperand(1);
5332 SDValue TrueVal2 = Op2.getOperand(2);
5333 SDValue FalseVal2 = Op2.getOperand(3);
5334 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5335
5336 SDValue V1Tmp = V1;
5337 SDValue V2Tmp = V2;
5338
5339 // Check that the registers and the constants match a max(min()) or min(max())
5340 // pattern
5341 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5342 K2 != FalseVal2 ||
5343 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5344 return SDValue();
5345
5346 // Check that the constant in the lower-bound check is
5347 // the opposite of the constant in the upper-bound check
5348 // in 1's complement.
5349 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5350 return SDValue();
5351
5352 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5353 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5354 int64_t PosVal = std::max(Val1, Val2);
5355 int64_t NegVal = std::min(Val1, Val2);
5356
5357 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5358 !isPowerOf2_64(PosVal + 1))
5359 return SDValue();
5360
5361 // Handle the difference between USAT (unsigned) and SSAT (signed)
5362 // saturation
5363 // At this point, PosVal is guaranteed to be positive
5364 uint64_t K = PosVal;
5365 SDLoc dl(Op);
5366 if (Val1 == ~Val2)
5367 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5368 DAG.getConstant(llvm::countr_one(K), dl, VT));
5369 if (NegVal == 0)
5370 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5371 DAG.getConstant(llvm::countr_one(K), dl, VT));
5372
5373 return SDValue();
5374}
5375
5376// Check if a condition of the type x < k ? k : x can be converted into a
5377// bit operation instead of conditional moves.
5378// Currently this is allowed given:
5379// - The conditions and values match up
5380// - k is 0 or -1 (all ones)
5381// This function will not check the last condition, thats up to the caller
5382// It returns true if the transformation can be made, and in such case
5383// returns x in V, and k in SatK.
5385 SDValue &SatK)
5386{
5387 SDValue LHS = Op.getOperand(0);
5388 SDValue RHS = Op.getOperand(1);
5389 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5390 SDValue TrueVal = Op.getOperand(2);
5391 SDValue FalseVal = Op.getOperand(3);
5392
5393 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5394 ? &RHS
5395 : nullptr;
5396
5397 // No constant operation in comparison, early out
5398 if (!K)
5399 return false;
5400
5401 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5402 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5403 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5404
5405 // If the constant on left and right side, or variable on left and right,
5406 // does not match, early out
5407 if (*K != KTmp || V != VTmp)
5408 return false;
5409
5410 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5411 SatK = *K;
5412 return true;
5413 }
5414
5415 return false;
5416}
5417
5418bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5419 if (VT == MVT::f32)
5420 return !Subtarget->hasVFP2Base();
5421 if (VT == MVT::f64)
5422 return !Subtarget->hasFP64();
5423 if (VT == MVT::f16)
5424 return !Subtarget->hasFullFP16();
5425 return false;
5426}
5427
5428SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5429 EVT VT = Op.getValueType();
5430 SDLoc dl(Op);
5431
5432 // Try to convert two saturating conditional selects into a single SSAT
5433 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5434 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5435 return SatValue;
5436
5437 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5438 // into more efficient bit operations, which is possible when k is 0 or -1
5439 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5440 // single instructions. On Thumb the shift and the bit operation will be two
5441 // instructions.
5442 // Only allow this transformation on full-width (32-bit) operations
5443 SDValue LowerSatConstant;
5444 SDValue SatValue;
5445 if (VT == MVT::i32 &&
5446 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5447 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5448 DAG.getConstant(31, dl, VT));
5449 if (isNullConstant(LowerSatConstant)) {
5450 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5451 DAG.getAllOnesConstant(dl, VT));
5452 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5453 } else if (isAllOnesConstant(LowerSatConstant))
5454 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5455 }
5456
5457 SDValue LHS = Op.getOperand(0);
5458 SDValue RHS = Op.getOperand(1);
5459 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5460 SDValue TrueVal = Op.getOperand(2);
5461 SDValue FalseVal = Op.getOperand(3);
5462 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5463 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5464
5465 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5466 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5467 unsigned TVal = CTVal->getZExtValue();
5468 unsigned FVal = CFVal->getZExtValue();
5469 unsigned Opcode = 0;
5470
5471 if (TVal == ~FVal) {
5472 Opcode = ARMISD::CSINV;
5473 } else if (TVal == ~FVal + 1) {
5474 Opcode = ARMISD::CSNEG;
5475 } else if (TVal + 1 == FVal) {
5476 Opcode = ARMISD::CSINC;
5477 } else if (TVal == FVal + 1) {
5478 Opcode = ARMISD::CSINC;
5479 std::swap(TrueVal, FalseVal);
5480 std::swap(TVal, FVal);
5481 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5482 }
5483
5484 if (Opcode) {
5485 // If one of the constants is cheaper than another, materialise the
5486 // cheaper one and let the csel generate the other.
5487 if (Opcode != ARMISD::CSINC &&
5488 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5489 std::swap(TrueVal, FalseVal);
5490 std::swap(TVal, FVal);
5491 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5492 }
5493
5494 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5495 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5496 // -(-a) == a, but (a+1)+1 != a).
5497 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5498 std::swap(TrueVal, FalseVal);
5499 std::swap(TVal, FVal);
5500 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5501 }
5502
5503 // Drops F's value because we can get it by inverting/negating TVal.
5504 FalseVal = TrueVal;
5505
5506 SDValue ARMcc;
5507 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5508 EVT VT = TrueVal.getValueType();
5509 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5510 }
5511 }
5512
5513 if (isUnsupportedFloatingType(LHS.getValueType())) {
5515 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5516
5517 // If softenSetCCOperands only returned one value, we should compare it to
5518 // zero.
5519 if (!RHS.getNode()) {
5520 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5521 CC = ISD::SETNE;
5522 }
5523 }
5524
5525 if (LHS.getValueType() == MVT::i32) {
5526 // Try to generate VSEL on ARMv8.
5527 // The VSEL instruction can't use all the usual ARM condition
5528 // codes: it only has two bits to select the condition code, so it's
5529 // constrained to use only GE, GT, VS and EQ.
5530 //
5531 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5532 // swap the operands of the previous compare instruction (effectively
5533 // inverting the compare condition, swapping 'less' and 'greater') and
5534 // sometimes need to swap the operands to the VSEL (which inverts the
5535 // condition in the sense of firing whenever the previous condition didn't)
5536 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5537 TrueVal.getValueType() == MVT::f32 ||
5538 TrueVal.getValueType() == MVT::f64)) {
5540 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5541 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5542 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5543 std::swap(TrueVal, FalseVal);
5544 }
5545 }
5546
5547 SDValue ARMcc;
5548 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5549 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5550 // Choose GE over PL, which vsel does now support
5551 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5552 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5553 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5554 }
5555
5556 ARMCC::CondCodes CondCode, CondCode2;
5557 FPCCToARMCC(CC, CondCode, CondCode2);
5558
5559 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5560 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5561 // must use VSEL (limited condition codes), due to not having conditional f16
5562 // moves.
5563 if (Subtarget->hasFPARMv8Base() &&
5564 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5565 (TrueVal.getValueType() == MVT::f16 ||
5566 TrueVal.getValueType() == MVT::f32 ||
5567 TrueVal.getValueType() == MVT::f64)) {
5568 bool swpCmpOps = false;
5569 bool swpVselOps = false;
5570 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5571
5572 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5573 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5574 if (swpCmpOps)
5575 std::swap(LHS, RHS);
5576 if (swpVselOps)
5577 std::swap(TrueVal, FalseVal);
5578 }
5579 }
5580
5581 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5582 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5583 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5584 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5585 if (CondCode2 != ARMCC::AL) {
5586 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5587 // FIXME: Needs another CMP because flag can have but one use.
5588 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5589 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5590 }
5591 return Result;
5592}
5593
5594/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5595/// to morph to an integer compare sequence.
5596static bool canChangeToInt(SDValue Op, bool &SeenZero,
5597 const ARMSubtarget *Subtarget) {
5598 SDNode *N = Op.getNode();
5599 if (!N->hasOneUse())
5600 // Otherwise it requires moving the value from fp to integer registers.
5601 return false;
5602 if (!N->getNumValues())
5603 return false;
5604 EVT VT = Op.getValueType();
5605 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5606 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5607 // vmrs are very slow, e.g. cortex-a8.
5608 return false;
5609
5610 if (isFloatingPointZero(Op)) {
5611 SeenZero = true;
5612 return true;
5613 }
5614 return ISD::isNormalLoad(N);
5615}
5616
5619 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5620
5621 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5622 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5623 Ld->getPointerInfo(), Ld->getAlign(),
5624 Ld->getMemOperand()->getFlags());
5625
5626 llvm_unreachable("Unknown VFP cmp argument!");
5627}
5628
5630 SDValue &RetVal1, SDValue &RetVal2) {
5631 SDLoc dl(Op);
5632
5633 if (isFloatingPointZero(Op)) {
5634 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5635 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5636 return;
5637 }
5638
5639 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5640 SDValue Ptr = Ld->getBasePtr();
5641 RetVal1 =
5642 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5643 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5644
5645 EVT PtrType = Ptr.getValueType();
5646 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5647 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5648 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5649 Ld->getPointerInfo().getWithOffset(4),
5650 commonAlignment(Ld->getAlign(), 4),
5651 Ld->getMemOperand()->getFlags());
5652 return;
5653 }
5654
5655 llvm_unreachable("Unknown VFP cmp argument!");
5656}
5657
5658/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5659/// f32 and even f64 comparisons to integer ones.
5660SDValue
5661ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5662 SDValue Chain = Op.getOperand(0);
5663 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5664 SDValue LHS = Op.getOperand(2);
5665 SDValue RHS = Op.getOperand(3);
5666 SDValue Dest = Op.getOperand(4);
5667 SDLoc dl(Op);
5668
5669 bool LHSSeenZero = false;
5670 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5671 bool RHSSeenZero = false;
5672 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5673 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5674 // If unsafe fp math optimization is enabled and there are no other uses of
5675 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5676 // to an integer comparison.
5677 if (CC == ISD::SETOEQ)
5678 CC = ISD::SETEQ;
5679 else if (CC == ISD::SETUNE)
5680 CC = ISD::SETNE;
5681
5682 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5683 SDValue ARMcc;
5684 if (LHS.getValueType() == MVT::f32) {
5685 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5686 bitcastf32Toi32(LHS, DAG), Mask);
5687 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5688 bitcastf32Toi32(RHS, DAG), Mask);
5689 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5690 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5691 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5692 Chain, Dest, ARMcc, CCR, Cmp);
5693 }
5694
5695 SDValue LHS1, LHS2;
5696 SDValue RHS1, RHS2;
5697 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5698 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5699 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5700 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5702 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5703 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5704 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5705 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5706 }
5707
5708 return SDValue();
5709}
5710
5711SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5712 SDValue Chain = Op.getOperand(0);
5713 SDValue Cond = Op.getOperand(1);
5714 SDValue Dest = Op.getOperand(2);
5715 SDLoc dl(Op);
5716
5717 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5718 // instruction.
5719 unsigned Opc = Cond.getOpcode();
5720 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5721 !Subtarget->isThumb1Only();
5722 if (Cond.getResNo() == 1 &&
5723 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5724 Opc == ISD::USUBO || OptimizeMul)) {
5725 // Only lower legal XALUO ops.
5726 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5727 return SDValue();
5728
5729 // The actual operation with overflow check.
5730 SDValue Value, OverflowCmp;
5731 SDValue ARMcc;
5732 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5733
5734 // Reverse the condition code.
5736 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5738 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5739 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5740
5741 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5742 OverflowCmp);
5743 }
5744
5745 return SDValue();
5746}
5747
5748SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5749 SDValue Chain = Op.getOperand(0);
5750 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5751 SDValue LHS = Op.getOperand(2);
5752 SDValue RHS = Op.getOperand(3);
5753 SDValue Dest = Op.getOperand(4);
5754 SDLoc dl(Op);
5755
5756 if (isUnsupportedFloatingType(LHS.getValueType())) {
5758 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5759
5760 // If softenSetCCOperands only returned one value, we should compare it to
5761 // zero.
5762 if (!RHS.getNode()) {
5763 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5764 CC = ISD::SETNE;
5765 }
5766 }
5767
5768 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5769 // instruction.
5770 unsigned Opc = LHS.getOpcode();
5771 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5772 !Subtarget->isThumb1Only();
5773 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5774 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5775 Opc == ISD::USUBO || OptimizeMul) &&
5776 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5777 // Only lower legal XALUO ops.
5778 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5779 return SDValue();
5780
5781 // The actual operation with overflow check.
5782 SDValue Value, OverflowCmp;
5783 SDValue ARMcc;
5784 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5785
5786 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5787 // Reverse the condition code.
5789 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5791 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5792 }
5793 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5794
5795 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5796 OverflowCmp);
5797 }
5798
5799 if (LHS.getValueType() == MVT::i32) {
5800 SDValue ARMcc;
5801 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5802 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5803 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5804 Chain, Dest, ARMcc, CCR, Cmp);
5805 }
5806
5807 if (getTargetMachine().Options.UnsafeFPMath &&
5808 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5809 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5810 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5811 return Result;
5812 }
5813
5814 ARMCC::CondCodes CondCode, CondCode2;
5815 FPCCToARMCC(CC, CondCode, CondCode2);
5816
5817 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5818 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5819 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5820 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5821 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5822 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5823 if (CondCode2 != ARMCC::AL) {
5824 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5825 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5826 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5827 }
5828 return Res;
5829}
5830
5831SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5832 SDValue Chain = Op.getOperand(0);
5833 SDValue Table = Op.getOperand(1);
5834 SDValue Index = Op.getOperand(2);
5835 SDLoc dl(Op);
5836
5837 EVT PTy = getPointerTy(DAG.getDataLayout());
5838 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5839 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5840 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5841 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5842 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5843 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5844 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5845 // which does another jump to the destination. This also makes it easier
5846 // to translate it to TBB / TBH later (Thumb2 only).
5847 // FIXME: This might not work if the function is extremely large.
5848 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5849 Addr, Op.getOperand(2), JTI);
5850 }
5851 if (isPositionIndependent() || Subtarget->isROPI()) {
5852 Addr =
5853 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5855 Chain = Addr.getValue(1);
5856 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5857 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5858 } else {
5859 Addr =
5860 DAG.getLoad(PTy, dl, Chain, Addr,
5862 Chain = Addr.getValue(1);
5863 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5864 }
5865}
5866
5868 EVT VT = Op.getValueType();
5869 SDLoc dl(Op);
5870
5871 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5872 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5873 return Op;
5874 return DAG.UnrollVectorOp(Op.getNode());
5875 }
5876
5877 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5878
5879 EVT NewTy;
5880 const EVT OpTy = Op.getOperand(0).getValueType();
5881 if (OpTy == MVT::v4f32)
5882 NewTy = MVT::v4i32;
5883 else if (OpTy == MVT::v4f16 && HasFullFP16)
5884 NewTy = MVT::v4i16;
5885 else if (OpTy == MVT::v8f16 && HasFullFP16)
5886 NewTy = MVT::v8i16;
5887 else
5888 llvm_unreachable("Invalid type for custom lowering!");
5889
5890 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5891 return DAG.UnrollVectorOp(Op.getNode());
5892
5893 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5894 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5895}
5896
5897SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5898 EVT VT = Op.getValueType();
5899 if (VT.isVector())
5900 return LowerVectorFP_TO_INT(Op, DAG);
5901
5902 bool IsStrict = Op->isStrictFPOpcode();
5903 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5904
5905 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5906 RTLIB::Libcall LC;
5907 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5908 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5909 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5910 Op.getValueType());
5911 else
5912 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5913 Op.getValueType());
5914 SDLoc Loc(Op);
5915 MakeLibCallOptions CallOptions;
5916 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5918 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5919 CallOptions, Loc, Chain);
5920 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5921 }
5922
5923 // FIXME: Remove this when we have strict fp instruction selection patterns
5924 if (IsStrict) {
5925 SDLoc Loc(Op);
5926 SDValue Result =
5929 Loc, Op.getValueType(), SrcVal);
5930 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5931 }
5932
5933 return Op;
5934}
5935
5937 const ARMSubtarget *Subtarget) {
5938 EVT VT = Op.getValueType();
5939 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5940 EVT FromVT = Op.getOperand(0).getValueType();
5941
5942 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5943 return Op;
5944 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5945 Subtarget->hasFP64())
5946 return Op;
5947 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5948 Subtarget->hasFullFP16())
5949 return Op;
5950 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5951 Subtarget->hasMVEFloatOps())
5952 return Op;
5953 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5954 Subtarget->hasMVEFloatOps())
5955 return Op;
5956
5957 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5958 return SDValue();
5959
5960 SDLoc DL(Op);
5961 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5962 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5963 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5964 DAG.getValueType(VT.getScalarType()));
5965 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5966 DAG.getConstant((1 << BW) - 1, DL, VT));
5967 if (IsSigned)
5968 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5969 DAG.getConstant(-(1 << BW), DL, VT));
5970 return Max;
5971}
5972
5974 EVT VT = Op.getValueType();
5975 SDLoc dl(Op);
5976
5977 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5978 if (VT.getVectorElementType() == MVT::f32)
5979 return Op;
5980 return DAG.UnrollVectorOp(Op.getNode());
5981 }
5982
5983 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5984 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5985 "Invalid type for custom lowering!");
5986
5987 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5988
5989 EVT DestVecType;
5990 if (VT == MVT::v4f32)
5991 DestVecType = MVT::v4i32;
5992 else if (VT == MVT::v4f16 && HasFullFP16)
5993 DestVecType = MVT::v4i16;
5994 else if (VT == MVT::v8f16 && HasFullFP16)
5995 DestVecType = MVT::v8i16;
5996 else
5997 return DAG.UnrollVectorOp(Op.getNode());
5998
5999 unsigned CastOpc;
6000 unsigned Opc;
6001 switch (Op.getOpcode()) {
6002 default: llvm_unreachable("Invalid opcode!");
6003 case ISD::SINT_TO_FP:
6004 CastOpc = ISD::SIGN_EXTEND;
6005 Opc = ISD::SINT_TO_FP;
6006 break;
6007 case ISD::UINT_TO_FP:
6008 CastOpc = ISD::ZERO_EXTEND;
6009 Opc = ISD::UINT_TO_FP;
6010 break;
6011 }
6012
6013 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
6014 return DAG.getNode(Opc, dl, VT, Op);
6015}
6016
6017SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
6018 EVT VT = Op.getValueType();
6019 if (VT.isVector())
6020 return LowerVectorINT_TO_FP(Op, DAG);
6021 if (isUnsupportedFloatingType(VT)) {
6022 RTLIB::Libcall LC;
6023 if (Op.getOpcode() == ISD::SINT_TO_FP)
6024 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6025 Op.getValueType());
6026 else
6027 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6028 Op.getValueType());
6029 MakeLibCallOptions CallOptions;
6030 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6031 CallOptions, SDLoc(Op)).first;
6032 }
6033
6034 return Op;
6035}
6036
6037SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6038 // Implement fcopysign with a fabs and a conditional fneg.
6039 SDValue Tmp0 = Op.getOperand(0);
6040 SDValue Tmp1 = Op.getOperand(1);
6041 SDLoc dl(Op);
6042 EVT VT = Op.getValueType();
6043 EVT SrcVT = Tmp1.getValueType();
6044 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6045 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6046 bool UseNEON = !InGPR && Subtarget->hasNEON();
6047
6048 if (UseNEON) {
6049 // Use VBSL to copy the sign bit.
6050 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6051 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6052 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6053 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6054 if (VT == MVT::f64)
6055 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6056 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6057 DAG.getConstant(32, dl, MVT::i32));
6058 else /*if (VT == MVT::f32)*/
6059 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6060 if (SrcVT == MVT::f32) {
6061 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6062 if (VT == MVT::f64)
6063 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6064 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6065 DAG.getConstant(32, dl, MVT::i32));
6066 } else if (VT == MVT::f32)
6067 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6068 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6069 DAG.getConstant(32, dl, MVT::i32));
6070 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6071 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6072
6074 dl, MVT::i32);
6075 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6076 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6077 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6078
6079 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6080 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6081 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6082 if (VT == MVT::f32) {
6083 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6084 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6085 DAG.getConstant(0, dl, MVT::i32));
6086 } else {
6087 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6088 }
6089
6090 return Res;
6091 }
6092
6093 // Bitcast operand 1 to i32.
6094 if (SrcVT == MVT::f64)
6095 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6096 Tmp1).getValue(1);
6097 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6098
6099 // Or in the signbit with integer operations.
6100 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6101 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6102 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6103 if (VT == MVT::f32) {
6104 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6105 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6106 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6107 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6108 }
6109
6110 // f64: Or the high part with signbit and then combine two parts.
6111 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6112 Tmp0);
6113 SDValue Lo = Tmp0.getValue(0);
6114 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6115 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6116 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6117}
6118
6119SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6121 MachineFrameInfo &MFI = MF.getFrameInfo();
6122 MFI.setReturnAddressIsTaken(true);
6123
6125 return SDValue();
6126
6127 EVT VT = Op.getValueType();
6128 SDLoc dl(Op);
6129 unsigned Depth = Op.getConstantOperandVal(0);
6130 if (Depth) {
6131 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6132 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6133 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6134 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6136 }
6137
6138 // Return LR, which contains the return address. Mark it an implicit live-in.
6139 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6140 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6141}
6142
6143SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6144 const ARMBaseRegisterInfo &ARI =
6145 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6147 MachineFrameInfo &MFI = MF.getFrameInfo();
6148 MFI.setFrameAddressIsTaken(true);
6149
6150 EVT VT = Op.getValueType();
6151 SDLoc dl(Op); // FIXME probably not meaningful
6152 unsigned Depth = Op.getConstantOperandVal(0);
6153 Register FrameReg = ARI.getFrameRegister(MF);
6154 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6155 while (Depth--)
6156 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6158 return FrameAddr;
6159}
6160
6161// FIXME? Maybe this could be a TableGen attribute on some registers and
6162// this table could be generated automatically from RegInfo.
6163Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6164 const MachineFunction &MF) const {
6166 .Case("sp", ARM::SP)
6167 .Default(0);
6168 if (Reg)
6169 return Reg;
6170 report_fatal_error(Twine("Invalid register name \""
6171 + StringRef(RegName) + "\"."));
6172}
6173
6174// Result is 64 bit value so split into two 32 bit values and return as a
6175// pair of values.
6177 SelectionDAG &DAG) {
6178 SDLoc DL(N);
6179
6180 // This function is only supposed to be called for i64 type destination.
6181 assert(N->getValueType(0) == MVT::i64
6182 && "ExpandREAD_REGISTER called for non-i64 type result.");
6183
6185 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6186 N->getOperand(0),
6187 N->getOperand(1));
6188
6189 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6190 Read.getValue(1)));
6191 Results.push_back(Read.getOperand(0));
6192}
6193
6194/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6195/// When \p DstVT, the destination type of \p BC, is on the vector
6196/// register bank and the source of bitcast, \p Op, operates on the same bank,
6197/// it might be possible to combine them, such that everything stays on the
6198/// vector register bank.
6199/// \p return The node that would replace \p BT, if the combine
6200/// is possible.
6202 SelectionDAG &DAG) {
6203 SDValue Op = BC->getOperand(0);
6204 EVT DstVT = BC->getValueType(0);
6205
6206 // The only vector instruction that can produce a scalar (remember,
6207 // since the bitcast was about to be turned into VMOVDRR, the source
6208 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6209 // Moreover, we can do this combine only if there is one use.
6210 // Finally, if the destination type is not a vector, there is not
6211 // much point on forcing everything on the vector bank.
6212 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6213 !Op.hasOneUse())
6214 return SDValue();
6215
6216 // If the index is not constant, we will introduce an additional
6217 // multiply that will stick.
6218 // Give up in that case.
6219 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6220 if (!Index)
6221 return SDValue();
6222 unsigned DstNumElt = DstVT.getVectorNumElements();
6223
6224 // Compute the new index.
6225 const APInt &APIntIndex = Index->getAPIntValue();
6226 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6227 NewIndex *= APIntIndex;
6228 // Check if the new constant index fits into i32.
6229 if (NewIndex.getBitWidth() > 32)
6230 return SDValue();
6231
6232 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6233 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6234 SDLoc dl(Op);
6235 SDValue ExtractSrc = Op.getOperand(0);
6236 EVT VecVT = EVT::getVectorVT(
6237 *DAG.getContext(), DstVT.getScalarType(),
6238 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6239 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6240 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6241 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6242}
6243
6244/// ExpandBITCAST - If the target supports VFP, this function is called to
6245/// expand a bit convert where either the source or destination type is i64 to
6246/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6247/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6248/// vectors), since the legalizer won't know what to do with that.
6249SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6250 const ARMSubtarget *Subtarget) const {
6251 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6252 SDLoc dl(N);
6253 SDValue Op = N->getOperand(0);
6254
6255 // This function is only supposed to be called for i16 and i64 types, either
6256 // as the source or destination of the bit convert.
6257 EVT SrcVT = Op.getValueType();
6258 EVT DstVT = N->getValueType(0);
6259
6260 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6261 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6262 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6263 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6264
6265 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6266 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6267 return DAG.getNode(
6268 ISD::TRUNCATE, SDLoc(N), DstVT,
6269 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6270
6271 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6272 return SDValue();
6273
6274 // Turn i64->f64 into VMOVDRR.
6275 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6276 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6277 // if we can combine the bitcast with its source.
6279 return Val;
6280 SDValue Lo, Hi;
6281 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6282 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6283 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6284 }
6285
6286 // Turn f64->i64 into VMOVRRD.
6287 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6288 SDValue Cvt;
6289 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6290 SrcVT.getVectorNumElements() > 1)
6291 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6292 DAG.getVTList(MVT::i32, MVT::i32),
6293 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6294 else
6295 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6296 DAG.getVTList(MVT::i32, MVT::i32), Op);
6297 // Merge the pieces into a single i64 value.
6298 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6299 }
6300
6301 return SDValue();
6302}
6303
6304/// getZeroVector - Returns a vector of specified type with all zero elements.
6305/// Zero vectors are used to represent vector negation and in those cases
6306/// will be implemented with the NEON VNEG instruction. However, VNEG does
6307/// not support i64 elements, so sometimes the zero vectors will need to be
6308/// explicitly constructed. Regardless, use a canonical VMOV to create the
6309/// zero vector.
6310static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6311 assert(VT.isVector() && "Expected a vector type");
6312 // The canonical modified immediate encoding of a zero vector is....0!
6313 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6314 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6315 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6316 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6317}
6318
6319/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6320/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6321SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6322 SelectionDAG &DAG) const {
6323 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6324 EVT VT = Op.getValueType();
6325 unsigned VTBits = VT.getSizeInBits();
6326 SDLoc dl(Op);
6327 SDValue ShOpLo = Op.getOperand(0);
6328 SDValue ShOpHi = Op.getOperand(1);
6329 SDValue ShAmt = Op.getOperand(2);
6330 SDValue ARMcc;
6331 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6332 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6333
6334 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6335
6336 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6337 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6338 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6339 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6340 DAG.getConstant(VTBits, dl, MVT::i32));
6341 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6342 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6343 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6344 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6345 ISD::SETGE, ARMcc, DAG, dl);
6346 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6347 ARMcc, CCR, CmpLo);
6348
6349 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6350 SDValue HiBigShift = Opc == ISD::SRA
6351 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6352 DAG.getConstant(VTBits - 1, dl, VT))
6353 : DAG.getConstant(0, dl, VT);
6354 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6355 ISD::SETGE, ARMcc, DAG, dl);
6356 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6357 ARMcc, CCR, CmpHi);
6358
6359 SDValue Ops[2] = { Lo, Hi };
6360 return DAG.getMergeValues(Ops, dl);
6361}
6362
6363/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6364/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6365SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6366 SelectionDAG &DAG) const {
6367 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6368 EVT VT = Op.getValueType();
6369 unsigned VTBits = VT.getSizeInBits();
6370 SDLoc dl(Op);
6371 SDValue ShOpLo = Op.getOperand(0);
6372 SDValue ShOpHi = Op.getOperand(1);
6373 SDValue ShAmt = Op.getOperand(2);
6374 SDValue ARMcc;
6375 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6376
6377 assert(Op.getOpcode() == ISD::SHL_PARTS);
6378 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6379 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6380 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6381 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6382 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6383
6384 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6385 DAG.getConstant(VTBits, dl, MVT::i32));
6386 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6387 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6388 ISD::SETGE, ARMcc, DAG, dl);
6389 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6390 ARMcc, CCR, CmpHi);
6391
6392 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6393 ISD::SETGE, ARMcc, DAG, dl);
6394 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6395 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6396 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6397
6398 SDValue Ops[2] = { Lo, Hi };
6399 return DAG.getMergeValues(Ops, dl);
6400}
6401
6402SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6403 SelectionDAG &DAG) const {
6404 // The rounding mode is in bits 23:22 of the FPSCR.
6405 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6406 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6407 // so that the shift + and get folded into a bitfield extract.
6408 SDLoc dl(Op);
6409 SDValue Chain = Op.getOperand(0);
6410 SDValue Ops[] = {Chain,
6411 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6412
6413 SDValue FPSCR =
6414 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6415 Chain = FPSCR.getValue(1);
6416 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6417 DAG.getConstant(1U << 22, dl, MVT::i32));
6418 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6419 DAG.getConstant(22, dl, MVT::i32));
6420 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6421 DAG.getConstant(3, dl, MVT::i32));
6422 return DAG.getMergeValues({And, Chain}, dl);
6423}
6424
6425SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6426 SelectionDAG &DAG) const {
6427 SDLoc DL(Op);
6428 SDValue Chain = Op->getOperand(0);
6429 SDValue RMValue = Op->getOperand(1);
6430
6431 // The rounding mode is in bits 23:22 of the FPSCR.
6432 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6433 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6434 // ((arg - 1) & 3) << 22).
6435 //
6436 // It is expected that the argument of llvm.set.rounding is within the
6437 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6438 // responsibility of the code generated llvm.set.rounding to ensure this
6439 // condition.
6440
6441 // Calculate new value of FPSCR[23:22].
6442 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6443 DAG.getConstant(1, DL, MVT::i32));
6444 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6445 DAG.getConstant(0x3, DL, MVT::i32));
6446 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6447 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6448
6449 // Get current value of FPSCR.
6450 SDValue Ops[] = {Chain,
6451 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6452 SDValue FPSCR =
6453 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6454 Chain = FPSCR.getValue(1);
6455 FPSCR = FPSCR.getValue(0);
6456
6457 // Put new rounding mode into FPSCR[23:22].
6458 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6459 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6460 DAG.getConstant(RMMask, DL, MVT::i32));
6461 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6462 SDValue Ops2[] = {
6463 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6464 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6465}
6466
6467SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6468 SelectionDAG &DAG) const {
6469 SDLoc DL(Op);
6470 SDValue Chain = Op->getOperand(0);
6471 SDValue Mode = Op->getOperand(1);
6472
6473 // Generate nodes to build:
6474 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6475 SDValue Ops[] = {Chain,
6476 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6477 SDValue FPSCR =
6478 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6479 Chain = FPSCR.getValue(1);
6480 FPSCR = FPSCR.getValue(0);
6481
6482 SDValue FPSCRMasked =
6483 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6484 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6485 SDValue InputMasked =
6486 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6487 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6488 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6489
6490 SDValue Ops2[] = {
6491 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6492 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6493}
6494
6495SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6496 SelectionDAG &DAG) const {
6497 SDLoc DL(Op);
6498 SDValue Chain = Op->getOperand(0);
6499
6500 // To get the default FP mode all control bits are cleared:
6501 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6502 SDValue Ops[] = {Chain,
6503 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6504 SDValue FPSCR =
6505 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6506 Chain = FPSCR.getValue(1);
6507 FPSCR = FPSCR.getValue(0);
6508
6509 SDValue FPSCRMasked = DAG.getNode(
6510 ISD::AND, DL, MVT::i32, FPSCR,
6512 SDValue Ops2[] = {Chain,
6513 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6514 FPSCRMasked};
6515 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6516}
6517
6519 const ARMSubtarget *ST) {
6520 SDLoc dl(N);
6521 EVT VT = N->getValueType(0);
6522 if (VT.isVector() && ST->hasNEON()) {
6523
6524 // Compute the least significant set bit: LSB = X & -X
6525 SDValue X = N->getOperand(0);
6526 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6527 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6528
6529 EVT ElemTy = VT.getVectorElementType();
6530
6531 if (ElemTy == MVT::i8) {
6532 // Compute with: cttz(x) = ctpop(lsb - 1)
6533 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6534 DAG.getTargetConstant(1, dl, ElemTy));
6535 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6536 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6537 }
6538
6539 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6540 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6541 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6542 unsigned NumBits = ElemTy.getSizeInBits();
6543 SDValue WidthMinus1 =
6544 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6545 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6546 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6547 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6548 }
6549
6550 // Compute with: cttz(x) = ctpop(lsb - 1)
6551
6552 // Compute LSB - 1.
6553 SDValue Bits;
6554 if (ElemTy == MVT::i64) {
6555 // Load constant 0xffff'ffff'ffff'ffff to register.
6556 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6557 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6558 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6559 } else {
6560 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6561 DAG.getTargetConstant(1, dl, ElemTy));
6562 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6563 }
6564 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6565 }
6566
6567 if (!ST->hasV6T2Ops())
6568 return SDValue();
6569
6570 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6571 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6572}
6573
6575 const ARMSubtarget *ST) {
6576 EVT VT = N->getValueType(0);
6577 SDLoc DL(N);
6578
6579 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6580 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6581 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6582 "Unexpected type for custom ctpop lowering");
6583
6584 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6585 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6586 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6587 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6588
6589 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6590 unsigned EltSize = 8;
6591 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6592 while (EltSize != VT.getScalarSizeInBits()) {
6594 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6595 TLI.getPointerTy(DAG.getDataLayout())));
6596 Ops.push_back(Res);
6597
6598 EltSize *= 2;
6599 NumElts /= 2;
6600 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6601 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6602 }
6603
6604 return Res;
6605}
6606
6607/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6608/// operand of a vector shift operation, where all the elements of the
6609/// build_vector must have the same constant integer value.
6610static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6611 // Ignore bit_converts.
6612 while (Op.getOpcode() == ISD::BITCAST)
6613 Op = Op.getOperand(0);
6614 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6615 APInt SplatBits, SplatUndef;
6616 unsigned SplatBitSize;
6617 bool HasAnyUndefs;
6618 if (!BVN ||
6619 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6620 ElementBits) ||
6621 SplatBitSize > ElementBits)
6622 return false;
6623 Cnt = SplatBits.getSExtValue();
6624 return true;
6625}
6626
6627/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6628/// operand of a vector shift left operation. That value must be in the range:
6629/// 0 <= Value < ElementBits for a left shift; or
6630/// 0 <= Value <= ElementBits for a long left shift.
6631static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6632 assert(VT.isVector() && "vector shift count is not a vector type");
6633 int64_t ElementBits = VT.getScalarSizeInBits();
6634 if (!getVShiftImm(Op, ElementBits, Cnt))
6635 return false;
6636 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6637}
6638
6639/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6640/// operand of a vector shift right operation. For a shift opcode, the value
6641/// is positive, but for an intrinsic the value count must be negative. The
6642/// absolute value must be in the range:
6643/// 1 <= |Value| <= ElementBits for a right shift; or
6644/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6645static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6646 int64_t &Cnt) {
6647 assert(VT.isVector() && "vector shift count is not a vector type");
6648 int64_t ElementBits = VT.getScalarSizeInBits();
6649 if (!getVShiftImm(Op, ElementBits, Cnt))
6650 return false;
6651 if (!isIntrinsic)
6652 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6653 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6654 Cnt = -Cnt;
6655 return true;
6656 }
6657 return false;
6658}
6659
6661 const ARMSubtarget *ST) {
6662 EVT VT = N->getValueType(0);
6663 SDLoc dl(N);
6664 int64_t Cnt;
6665
6666 if (!VT.isVector())
6667 return SDValue();
6668
6669 // We essentially have two forms here. Shift by an immediate and shift by a
6670 // vector register (there are also shift by a gpr, but that is just handled
6671 // with a tablegen pattern). We cannot easily match shift by an immediate in
6672 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6673 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6674 // signed or unsigned, and a negative shift indicates a shift right).
6675 if (N->getOpcode() == ISD::SHL) {
6676 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6677 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6678 DAG.getConstant(Cnt, dl, MVT::i32));
6679 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6680 N->getOperand(1));
6681 }
6682
6683 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6684 "unexpected vector shift opcode");
6685
6686 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6687 unsigned VShiftOpc =
6688 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6689 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6690 DAG.getConstant(Cnt, dl, MVT::i32));
6691 }
6692
6693 // Other right shifts we don't have operations for (we use a shift left by a
6694 // negative number).
6695 EVT ShiftVT = N->getOperand(1).getValueType();
6696 SDValue NegatedCount = DAG.getNode(
6697 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6698 unsigned VShiftOpc =
6699 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6700 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6701}
6702
6704 const ARMSubtarget *ST) {
6705 EVT VT = N->getValueType(0);
6706 SDLoc dl(N);
6707
6708 // We can get here for a node like i32 = ISD::SHL i32, i64
6709 if (VT != MVT::i64)
6710 return SDValue();
6711
6712 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6713 N->getOpcode() == ISD::SHL) &&
6714 "Unknown shift to lower!");
6715
6716 unsigned ShOpc = N->getOpcode();
6717 if (ST->hasMVEIntegerOps()) {
6718 SDValue ShAmt = N->getOperand(1);
6719 unsigned ShPartsOpc = ARMISD::LSLL;
6720 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6721
6722 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6723 // then do the default optimisation
6724 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6725 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6726 return SDValue();
6727
6728 // Extract the lower 32 bits of the shift amount if it's not an i32
6729 if (ShAmt->getValueType(0) != MVT::i32)
6730 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6731
6732 if (ShOpc == ISD::SRL) {
6733 if (!Con)
6734 // There is no t2LSRLr instruction so negate and perform an lsll if the
6735 // shift amount is in a register, emulating a right shift.
6736 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6737 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6738 else
6739 // Else generate an lsrl on the immediate shift amount
6740 ShPartsOpc = ARMISD::LSRL;
6741 } else if (ShOpc == ISD::SRA)
6742 ShPartsOpc = ARMISD::ASRL;
6743
6744 // Split Lower/Upper 32 bits of the destination/source
6745 SDValue Lo, Hi;
6746 std::tie(Lo, Hi) =
6747 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6748 // Generate the shift operation as computed above
6749 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6750 ShAmt);
6751 // The upper 32 bits come from the second return value of lsll
6752 Hi = SDValue(Lo.getNode(), 1);
6753 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6754 }
6755
6756 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6757 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6758 return SDValue();
6759
6760 // If we are in thumb mode, we don't have RRX.
6761 if (ST->isThumb1Only())
6762 return SDValue();
6763
6764 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6765 SDValue Lo, Hi;
6766 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6767
6768 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6769 // captures the result into a carry flag.
6770 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6771 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6772
6773 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6774 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6775
6776 // Merge the pieces into a single i64 value.
6777 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6778}
6779
6781 const ARMSubtarget *ST) {
6782 bool Invert = false;
6783 bool Swap = false;
6784 unsigned Opc = ARMCC::AL;
6785
6786 SDValue Op0 = Op.getOperand(0);
6787 SDValue Op1 = Op.getOperand(1);
6788 SDValue CC = Op.getOperand(2);
6789 EVT VT = Op.getValueType();
6790 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6791 SDLoc dl(Op);
6792
6793 EVT CmpVT;
6794 if (ST->hasNEON())
6796 else {
6797 assert(ST->hasMVEIntegerOps() &&
6798 "No hardware support for integer vector comparison!");
6799
6800 if (Op.getValueType().getVectorElementType() != MVT::i1)
6801 return SDValue();
6802
6803 // Make sure we expand floating point setcc to scalar if we do not have
6804 // mve.fp, so that we can handle them from there.
6805 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6806 return SDValue();
6807
6808 CmpVT = VT;
6809 }
6810
6811 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6812 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6813 // Special-case integer 64-bit equality comparisons. They aren't legal,
6814 // but they can be lowered with a few vector instructions.
6815 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6816 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6817 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6818 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6819 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6820 DAG.getCondCode(ISD::SETEQ));
6821 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6822 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6823 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6824 if (SetCCOpcode == ISD::SETNE)
6825 Merged = DAG.getNOT(dl, Merged, CmpVT);
6826 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6827 return Merged;
6828 }
6829
6830 if (CmpVT.getVectorElementType() == MVT::i64)
6831 // 64-bit comparisons are not legal in general.
6832 return SDValue();
6833
6834 if (Op1.getValueType().isFloatingPoint()) {
6835 switch (SetCCOpcode) {
6836 default: llvm_unreachable("Illegal FP comparison");
6837 case ISD::SETUNE:
6838 case ISD::SETNE:
6839 if (ST->hasMVEFloatOps()) {
6840 Opc = ARMCC::NE; break;
6841 } else {
6842 Invert = true; [[fallthrough]];
6843 }
6844 case ISD::SETOEQ:
6845 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6846 case ISD::SETOLT:
6847 case ISD::SETLT: Swap = true; [[fallthrough]];
6848 case ISD::SETOGT:
6849 case ISD::SETGT: Opc = ARMCC::GT; break;
6850 case ISD::SETOLE:
6851 case ISD::SETLE: Swap = true; [[fallthrough]];
6852 case ISD::SETOGE:
6853 case ISD::SETGE: Opc = ARMCC::GE; break;
6854 case ISD::SETUGE: Swap = true; [[fallthrough]];
6855 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6856 case ISD::SETUGT: Swap = true; [[fallthrough]];
6857 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6858 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6859 case ISD::SETONE: {
6860 // Expand this to (OLT | OGT).
6861 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6862 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6863 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6864 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6865 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6866 if (Invert)
6867 Result = DAG.getNOT(dl, Result, VT);
6868 return Result;
6869 }
6870 case ISD::SETUO: Invert = true; [[fallthrough]];
6871 case ISD::SETO: {
6872 // Expand this to (OLT | OGE).
6873 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6874 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6875 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6876 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6877 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6878 if (Invert)
6879 Result = DAG.getNOT(dl, Result, VT);
6880 return Result;
6881 }
6882 }
6883 } else {
6884 // Integer comparisons.
6885 switch (SetCCOpcode) {
6886 default: llvm_unreachable("Illegal integer comparison");
6887 case ISD::SETNE:
6888 if (ST->hasMVEIntegerOps()) {
6889 Opc = ARMCC::NE; break;
6890 } else {
6891 Invert = true; [[fallthrough]];
6892 }
6893 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6894 case ISD::SETLT: Swap = true; [[fallthrough]];
6895 case ISD::SETGT: Opc = ARMCC::GT; break;
6896 case ISD::SETLE: Swap = true; [[fallthrough]];
6897 case ISD::SETGE: Opc = ARMCC::GE; break;
6898 case ISD::SETULT: Swap = true; [[fallthrough]];
6899 case ISD::SETUGT: Opc = ARMCC::HI; break;
6900 case ISD::SETULE: Swap = true; [[fallthrough]];
6901 case ISD::SETUGE: Opc = ARMCC::HS; break;
6902 }
6903
6904 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6905 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6906 SDValue AndOp;
6908 AndOp = Op0;
6909 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6910 AndOp = Op1;
6911
6912 // Ignore bitconvert.
6913 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6914 AndOp = AndOp.getOperand(0);
6915
6916 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6917 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6918 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6919 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6920 if (!Invert)
6921 Result = DAG.getNOT(dl, Result, VT);
6922 return Result;
6923 }
6924 }
6925 }
6926
6927 if (Swap)
6928 std::swap(Op0, Op1);
6929
6930 // If one of the operands is a constant vector zero, attempt to fold the
6931 // comparison to a specialized compare-against-zero form.
6933 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6934 Opc == ARMCC::NE)) {
6935 if (Opc == ARMCC::GE)
6936 Opc = ARMCC::LE;
6937 else if (Opc == ARMCC::GT)
6938 Opc = ARMCC::LT;
6939 std::swap(Op0, Op1);
6940 }
6941
6942 SDValue Result;
6944 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6945 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6946 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6947 DAG.getConstant(Opc, dl, MVT::i32));
6948 else
6949 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6950 DAG.getConstant(Opc, dl, MVT::i32));
6951
6952 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6953
6954 if (Invert)
6955 Result = DAG.getNOT(dl, Result, VT);
6956
6957 return Result;
6958}
6959
6961 SDValue LHS = Op.getOperand(0);
6962 SDValue RHS = Op.getOperand(1);
6963 SDValue Carry = Op.getOperand(2);
6964 SDValue Cond = Op.getOperand(3);
6965 SDLoc DL(Op);
6966
6967 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6968
6969 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6970 // have to invert the carry first.
6971 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6972 DAG.getConstant(1, DL, MVT::i32), Carry);
6973 // This converts the boolean value carry into the carry flag.
6974 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6975
6976 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6977 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6978
6979 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6980 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6981 SDValue ARMcc = DAG.getConstant(
6982 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6983 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6984 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6985 Cmp.getValue(1), SDValue());
6986 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6987 CCR, Chain.getValue(1));
6988}
6989
6990/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6991/// valid vector constant for a NEON or MVE instruction with a "modified
6992/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6993static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6994 unsigned SplatBitSize, SelectionDAG &DAG,
6995 const SDLoc &dl, EVT &VT, EVT VectorVT,
6996 VMOVModImmType type) {
6997 unsigned OpCmode, Imm;
6998 bool is128Bits = VectorVT.is128BitVector();
6999
7000 // SplatBitSize is set to the smallest size that splats the vector, so a
7001 // zero vector will always have SplatBitSize == 8. However, NEON modified
7002 // immediate instructions others than VMOV do not support the 8-bit encoding
7003 // of a zero vector, and the default encoding of zero is supposed to be the
7004 // 32-bit version.
7005 if (SplatBits == 0)
7006 SplatBitSize = 32;
7007
7008 switch (SplatBitSize) {
7009 case 8:
7010 if (type != VMOVModImm)
7011 return SDValue();
7012 // Any 1-byte value is OK. Op=0, Cmode=1110.
7013 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
7014 OpCmode = 0xe;
7015 Imm = SplatBits;
7016 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
7017 break;
7018
7019 case 16:
7020 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7021 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
7022 if ((SplatBits & ~0xff) == 0) {
7023 // Value = 0x00nn: Op=x, Cmode=100x.
7024 OpCmode = 0x8;
7025 Imm = SplatBits;
7026 break;
7027 }
7028 if ((SplatBits & ~0xff00) == 0) {
7029 // Value = 0xnn00: Op=x, Cmode=101x.
7030 OpCmode = 0xa;
7031 Imm = SplatBits >> 8;
7032 break;
7033 }
7034 return SDValue();
7035
7036 case 32:
7037 // NEON's 32-bit VMOV supports splat values where:
7038 // * only one byte is nonzero, or
7039 // * the least significant byte is 0xff and the second byte is nonzero, or
7040 // * the least significant 2 bytes are 0xff and the third is nonzero.
7041 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7042 if ((SplatBits & ~0xff) == 0) {
7043 // Value = 0x000000nn: Op=x, Cmode=000x.
7044 OpCmode = 0;
7045 Imm = SplatBits;
7046 break;
7047 }
7048 if ((SplatBits & ~0xff00) == 0) {
7049 // Value = 0x0000nn00: Op=x, Cmode=001x.
7050 OpCmode = 0x2;
7051 Imm = SplatBits >> 8;
7052 break;
7053 }
7054 if ((SplatBits & ~0xff0000) == 0) {
7055 // Value = 0x00nn0000: Op=x, Cmode=010x.
7056 OpCmode = 0x4;
7057 Imm = SplatBits >> 16;
7058 break;
7059 }
7060 if ((SplatBits & ~0xff000000) == 0) {
7061 // Value = 0xnn000000: Op=x, Cmode=011x.
7062 OpCmode = 0x6;
7063 Imm = SplatBits >> 24;
7064 break;
7065 }
7066
7067 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7068 if (type == OtherModImm) return SDValue();
7069
7070 if ((SplatBits & ~0xffff) == 0 &&
7071 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7072 // Value = 0x0000nnff: Op=x, Cmode=1100.
7073 OpCmode = 0xc;
7074 Imm = SplatBits >> 8;
7075 break;
7076 }
7077
7078 // cmode == 0b1101 is not supported for MVE VMVN
7079 if (type == MVEVMVNModImm)
7080 return SDValue();
7081
7082 if ((SplatBits & ~0xffffff) == 0 &&
7083 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7084 // Value = 0x00nnffff: Op=x, Cmode=1101.
7085 OpCmode = 0xd;
7086 Imm = SplatBits >> 16;
7087 break;
7088 }
7089
7090 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7091 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7092 // VMOV.I32. A (very) minor optimization would be to replicate the value
7093 // and fall through here to test for a valid 64-bit splat. But, then the
7094 // caller would also need to check and handle the change in size.
7095 return SDValue();
7096
7097 case 64: {
7098 if (type != VMOVModImm)
7099 return SDValue();
7100 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7101 uint64_t BitMask = 0xff;
7102 unsigned ImmMask = 1;
7103 Imm = 0;
7104 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7105 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7106 Imm |= ImmMask;
7107 } else if ((SplatBits & BitMask) != 0) {
7108 return SDValue();
7109 }
7110 BitMask <<= 8;
7111 ImmMask <<= 1;
7112 }
7113
7114 if (DAG.getDataLayout().isBigEndian()) {
7115 // Reverse the order of elements within the vector.
7116 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7117 unsigned Mask = (1 << BytesPerElem) - 1;
7118 unsigned NumElems = 8 / BytesPerElem;
7119 unsigned NewImm = 0;
7120 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7121 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7122 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7123 }
7124 Imm = NewImm;
7125 }
7126
7127 // Op=1, Cmode=1110.
7128 OpCmode = 0x1e;
7129 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7130 break;
7131 }
7132
7133 default:
7134 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7135 }
7136
7137 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7138 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7139}
7140
7141SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7142 const ARMSubtarget *ST) const {
7143 EVT VT = Op.getValueType();
7144 bool IsDouble = (VT == MVT::f64);
7145 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7146 const APFloat &FPVal = CFP->getValueAPF();
7147
7148 // Prevent floating-point constants from using literal loads
7149 // when execute-only is enabled.
7150 if (ST->genExecuteOnly()) {
7151 // We shouldn't trigger this for v6m execute-only
7152 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7153 "Unexpected architecture");
7154
7155 // If we can represent the constant as an immediate, don't lower it
7156 if (isFPImmLegal(FPVal, VT))
7157 return Op;
7158 // Otherwise, construct as integer, and move to float register
7159 APInt INTVal = FPVal.bitcastToAPInt();
7160 SDLoc DL(CFP);
7161 switch (VT.getSimpleVT().SimpleTy) {
7162 default:
7163 llvm_unreachable("Unknown floating point type!");
7164 break;
7165 case MVT::f64: {
7166 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7167 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7168 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7169 }
7170 case MVT::f32:
7171 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7172 DAG.getConstant(INTVal, DL, MVT::i32));
7173 }
7174 }
7175
7176 if (!ST->hasVFP3Base())
7177 return SDValue();
7178
7179 // Use the default (constant pool) lowering for double constants when we have
7180 // an SP-only FPU
7181 if (IsDouble && !Subtarget->hasFP64())
7182 return SDValue();
7183
7184 // Try splatting with a VMOV.f32...
7185 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7186
7187 if (ImmVal != -1) {
7188 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7189 // We have code in place to select a valid ConstantFP already, no need to
7190 // do any mangling.
7191 return Op;
7192 }
7193
7194 // It's a float and we are trying to use NEON operations where
7195 // possible. Lower it to a splat followed by an extract.
7196 SDLoc DL(Op);
7197 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7198 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7199 NewVal);
7200 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7201 DAG.getConstant(0, DL, MVT::i32));
7202 }
7203
7204 // The rest of our options are NEON only, make sure that's allowed before
7205 // proceeding..
7206 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7207 return SDValue();
7208
7209 EVT VMovVT;
7210 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7211
7212 // It wouldn't really be worth bothering for doubles except for one very
7213 // important value, which does happen to match: 0.0. So make sure we don't do
7214 // anything stupid.
7215 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7216 return SDValue();
7217
7218 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7219 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7220 VMovVT, VT, VMOVModImm);
7221 if (NewVal != SDValue()) {
7222 SDLoc DL(Op);
7223 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7224 NewVal);
7225 if (IsDouble)
7226 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7227
7228 // It's a float: cast and extract a vector element.
7229 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7230 VecConstant);
7231 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7232 DAG.getConstant(0, DL, MVT::i32));
7233 }
7234
7235 // Finally, try a VMVN.i32
7236 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7237 VT, VMVNModImm);
7238 if (NewVal != SDValue()) {
7239 SDLoc DL(Op);
7240 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7241
7242 if (IsDouble)
7243 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7244
7245 // It's a float: cast and extract a vector element.
7246 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7247 VecConstant);
7248 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7249 DAG.getConstant(0, DL, MVT::i32));
7250 }
7251
7252 return SDValue();
7253}
7254
7255// check if an VEXT instruction can handle the shuffle mask when the
7256// vector sources of the shuffle are the same.
7257static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7258 unsigned NumElts = VT.getVectorNumElements();
7259
7260 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7261 if (M[0] < 0)
7262 return false;
7263
7264 Imm = M[0];
7265
7266 // If this is a VEXT shuffle, the immediate value is the index of the first
7267 // element. The other shuffle indices must be the successive elements after
7268 // the first one.
7269 unsigned ExpectedElt = Imm;
7270 for (unsigned i = 1; i < NumElts; ++i) {
7271 // Increment the expected index. If it wraps around, just follow it
7272 // back to index zero and keep going.
7273 ++ExpectedElt;
7274 if (ExpectedElt == NumElts)
7275 ExpectedElt = 0;
7276
7277 if (M[i] < 0) continue; // ignore UNDEF indices
7278 if (ExpectedElt != static_cast<unsigned>(M[i]))
7279 return false;
7280 }
7281
7282 return true;
7283}
7284
7285static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7286 bool &ReverseVEXT, unsigned &Imm) {
7287 unsigned NumElts = VT.getVectorNumElements();
7288 ReverseVEXT = false;
7289
7290 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7291 if (M[0] < 0)
7292 return false;
7293
7294 Imm = M[0];
7295
7296 // If this is a VEXT shuffle, the immediate value is the index of the first
7297 // element. The other shuffle indices must be the successive elements after
7298 // the first one.
7299 unsigned ExpectedElt = Imm;
7300 for (unsigned i = 1; i < NumElts; ++i) {
7301 // Increment the expected index. If it wraps around, it may still be
7302 // a VEXT but the source vectors must be swapped.
7303 ExpectedElt += 1;
7304 if (ExpectedElt == NumElts * 2) {
7305 ExpectedElt = 0;
7306 ReverseVEXT = true;
7307 }
7308
7309 if (M[i] < 0) continue; // ignore UNDEF indices
7310 if (ExpectedElt != static_cast<unsigned>(M[i]))
7311 return false;
7312 }
7313
7314 // Adjust the index value if the source operands will be swapped.
7315 if (ReverseVEXT)
7316 Imm -= NumElts;
7317
7318 return true;
7319}
7320
7321static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7322 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7323 // range, then 0 is placed into the resulting vector. So pretty much any mask
7324 // of 8 elements can work here.
7325 return VT == MVT::v8i8 && M.size() == 8;
7326}
7327
7328static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7329 unsigned Index) {
7330 if (Mask.size() == Elements * 2)
7331 return Index / Elements;
7332 return Mask[Index] == 0 ? 0 : 1;
7333}
7334
7335// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7336// checking that pairs of elements in the shuffle mask represent the same index
7337// in each vector, incrementing the expected index by 2 at each step.
7338// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7339// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7340// v2={e,f,g,h}
7341// WhichResult gives the offset for each element in the mask based on which
7342// of the two results it belongs to.
7343//
7344// The transpose can be represented either as:
7345// result1 = shufflevector v1, v2, result1_shuffle_mask
7346// result2 = shufflevector v1, v2, result2_shuffle_mask
7347// where v1/v2 and the shuffle masks have the same number of elements
7348// (here WhichResult (see below) indicates which result is being checked)
7349//
7350// or as:
7351// results = shufflevector v1, v2, shuffle_mask
7352// where both results are returned in one vector and the shuffle mask has twice
7353// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7354// want to check the low half and high half of the shuffle mask as if it were
7355// the other case
7356static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7357 unsigned EltSz = VT.getScalarSizeInBits();
7358 if (EltSz == 64)
7359 return false;
7360
7361 unsigned NumElts = VT.getVectorNumElements();
7362 if (M.size() != NumElts && M.size() != NumElts*2)
7363 return false;
7364
7365 // If the mask is twice as long as the input vector then we need to check the
7366 // upper and lower parts of the mask with a matching value for WhichResult
7367 // FIXME: A mask with only even values will be rejected in case the first
7368 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7369 // M[0] is used to determine WhichResult
7370 for (unsigned i = 0; i < M.size(); i += NumElts) {
7371 WhichResult = SelectPairHalf(NumElts, M, i);
7372 for (unsigned j = 0; j < NumElts; j += 2) {
7373 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7374 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7375 return false;
7376 }
7377 }
7378
7379 if (M.size() == NumElts*2)
7380 WhichResult = 0;
7381
7382 return true;
7383}
7384
7385/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7386/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7387/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7388static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7389 unsigned EltSz = VT.getScalarSizeInBits();
7390 if (EltSz == 64)
7391 return false;
7392
7393 unsigned NumElts = VT.getVectorNumElements();
7394 if (M.size() != NumElts && M.size() != NumElts*2)
7395 return false;
7396
7397 for (unsigned i = 0; i < M.size(); i += NumElts) {
7398 WhichResult = SelectPairHalf(NumElts, M, i);
7399 for (unsigned j = 0; j < NumElts; j += 2) {
7400 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7401 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7402 return false;
7403 }
7404 }
7405
7406 if (M.size() == NumElts*2)
7407 WhichResult = 0;
7408
7409 return true;
7410}
7411
7412// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7413// that the mask elements are either all even and in steps of size 2 or all odd
7414// and in steps of size 2.
7415// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7416// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7417// v2={e,f,g,h}
7418// Requires similar checks to that of isVTRNMask with
7419// respect the how results are returned.
7420static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7421 unsigned EltSz = VT.getScalarSizeInBits();
7422 if (EltSz == 64)
7423 return false;
7424
7425 unsigned NumElts = VT.getVectorNumElements();
7426 if (M.size() != NumElts && M.size() != NumElts*2)
7427 return false;
7428
7429 for (unsigned i = 0; i < M.size(); i += NumElts) {
7430 WhichResult = SelectPairHalf(NumElts, M, i);
7431 for (unsigned j = 0; j < NumElts; ++j) {
7432 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7433 return false;
7434 }
7435 }
7436
7437 if (M.size() == NumElts*2)
7438 WhichResult = 0;
7439
7440 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7441 if (VT.is64BitVector() && EltSz == 32)
7442 return false;
7443
7444 return true;
7445}
7446
7447/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7448/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7449/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7450static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7451 unsigned EltSz = VT.getScalarSizeInBits();
7452 if (EltSz == 64)
7453 return false;
7454
7455 unsigned NumElts = VT.getVectorNumElements();
7456 if (M.size() != NumElts && M.size() != NumElts*2)
7457 return false;
7458
7459 unsigned Half = NumElts / 2;
7460 for (unsigned i = 0; i < M.size(); i += NumElts) {
7461 WhichResult = SelectPairHalf(NumElts, M, i);
7462 for (unsigned j = 0; j < NumElts; j += Half) {
7463 unsigned Idx = WhichResult;
7464 for (unsigned k = 0; k < Half; ++k) {
7465 int MIdx = M[i + j + k];
7466 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7467 return false;
7468 Idx += 2;
7469 }
7470 }
7471 }
7472
7473 if (M.size() == NumElts*2)
7474 WhichResult = 0;
7475
7476 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7477 if (VT.is64BitVector() && EltSz == 32)
7478 return false;
7479
7480 return true;
7481}
7482
7483// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7484// that pairs of elements of the shufflemask represent the same index in each
7485// vector incrementing sequentially through the vectors.
7486// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7487// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7488// v2={e,f,g,h}
7489// Requires similar checks to that of isVTRNMask with respect the how results
7490// are returned.
7491static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7492 unsigned EltSz = VT.getScalarSizeInBits();
7493 if (EltSz == 64)
7494 return false;
7495
7496 unsigned NumElts = VT.getVectorNumElements();
7497 if (M.size() != NumElts && M.size() != NumElts*2)
7498 return false;
7499
7500 for (unsigned i = 0; i < M.size(); i += NumElts) {
7501 WhichResult = SelectPairHalf(NumElts, M, i);
7502 unsigned Idx = WhichResult * NumElts / 2;
7503 for (unsigned j = 0; j < NumElts; j += 2) {
7504 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7505 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7506 return false;
7507 Idx += 1;
7508 }
7509 }
7510
7511 if (M.size() == NumElts*2)
7512 WhichResult = 0;
7513
7514 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7515 if (VT.is64BitVector() && EltSz == 32)
7516 return false;
7517
7518 return true;
7519}
7520
7521/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7522/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7523/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7524static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7525 unsigned EltSz = VT.getScalarSizeInBits();
7526 if (EltSz == 64)
7527 return false;
7528
7529 unsigned NumElts = VT.getVectorNumElements();
7530 if (M.size() != NumElts && M.size() != NumElts*2)
7531 return false;
7532
7533 for (unsigned i = 0; i < M.size(); i += NumElts) {
7534 WhichResult = SelectPairHalf(NumElts, M, i);
7535 unsigned Idx = WhichResult * NumElts / 2;
7536 for (unsigned j = 0; j < NumElts; j += 2) {
7537 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7538 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7539 return false;
7540 Idx += 1;
7541 }
7542 }
7543
7544 if (M.size() == NumElts*2)
7545 WhichResult = 0;
7546
7547 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7548 if (VT.is64BitVector() && EltSz == 32)
7549 return false;
7550
7551 return true;
7552}
7553
7554/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7555/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7556static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7557 unsigned &WhichResult,
7558 bool &isV_UNDEF) {
7559 isV_UNDEF = false;
7560 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7561 return ARMISD::VTRN;
7562 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7563 return ARMISD::VUZP;
7564 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7565 return ARMISD::VZIP;
7566
7567 isV_UNDEF = true;
7568 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7569 return ARMISD::VTRN;
7570 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7571 return ARMISD::VUZP;
7572 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7573 return ARMISD::VZIP;
7574
7575 return 0;
7576}
7577
7578/// \return true if this is a reverse operation on an vector.
7579static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7580 unsigned NumElts = VT.getVectorNumElements();
7581 // Make sure the mask has the right size.
7582 if (NumElts != M.size())
7583 return false;
7584
7585 // Look for <15, ..., 3, -1, 1, 0>.
7586 for (unsigned i = 0; i != NumElts; ++i)
7587 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7588 return false;
7589
7590 return true;
7591}
7592
7593static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7594 unsigned NumElts = VT.getVectorNumElements();
7595 // Make sure the mask has the right size.
7596 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7597 return false;
7598
7599 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7600 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7601 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7602 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7603 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7604 int Ofs = Top ? 1 : 0;
7605 int Upper = SingleSource ? 0 : NumElts;
7606 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7607 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7608 return false;
7609 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7610 return false;
7611 }
7612 return true;
7613}
7614
7615static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7616 unsigned NumElts = VT.getVectorNumElements();
7617 // Make sure the mask has the right size.
7618 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7619 return false;
7620
7621 // If Top
7622 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7623 // This inserts Input2 into Input1
7624 // else if not Top
7625 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7626 // This inserts Input1 into Input2
7627 unsigned Offset = Top ? 0 : 1;
7628 unsigned N = SingleSource ? 0 : NumElts;
7629 for (unsigned i = 0; i < NumElts; i += 2) {
7630 if (M[i] >= 0 && M[i] != (int)i)
7631 return false;
7632 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7633 return false;
7634 }
7635
7636 return true;
7637}
7638
7639static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7640 unsigned NumElts = ToVT.getVectorNumElements();
7641 if (NumElts != M.size())
7642 return false;
7643
7644 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7645 // looking for patterns of:
7646 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7647 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7648
7649 unsigned Off0 = rev ? NumElts / 2 : 0;
7650 unsigned Off1 = rev ? 0 : NumElts / 2;
7651 for (unsigned i = 0; i < NumElts; i += 2) {
7652 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7653 return false;
7654 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7655 return false;
7656 }
7657
7658 return true;
7659}
7660
7661// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7662// from a pair of inputs. For example:
7663// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7664// FP_ROUND(EXTRACT_ELT(Y, 0),
7665// FP_ROUND(EXTRACT_ELT(X, 1),
7666// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7668 const ARMSubtarget *ST) {
7669 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7670 if (!ST->hasMVEFloatOps())
7671 return SDValue();
7672
7673 SDLoc dl(BV);
7674 EVT VT = BV.getValueType();
7675 if (VT != MVT::v8f16)
7676 return SDValue();
7677
7678 // We are looking for a buildvector of fptrunc elements, where all the
7679 // elements are interleavingly extracted from two sources. Check the first two
7680 // items are valid enough and extract some info from them (they are checked
7681 // properly in the loop below).
7682 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7685 return SDValue();
7686 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7689 return SDValue();
7690 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7691 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7692 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7693 return SDValue();
7694
7695 // Check all the values in the BuildVector line up with our expectations.
7696 for (unsigned i = 1; i < 4; i++) {
7697 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7698 return Trunc.getOpcode() == ISD::FP_ROUND &&
7700 Trunc.getOperand(0).getOperand(0) == Op &&
7701 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7702 };
7703 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7704 return SDValue();
7705 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7706 return SDValue();
7707 }
7708
7709 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7710 DAG.getConstant(0, dl, MVT::i32));
7711 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7712 DAG.getConstant(1, dl, MVT::i32));
7713}
7714
7715// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7716// from a single input on alternating lanes. For example:
7717// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7718// FP_ROUND(EXTRACT_ELT(X, 2),
7719// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7721 const ARMSubtarget *ST) {
7722 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7723 if (!ST->hasMVEFloatOps())
7724 return SDValue();
7725
7726 SDLoc dl(BV);
7727 EVT VT = BV.getValueType();
7728 if (VT != MVT::v4f32)
7729 return SDValue();
7730
7731 // We are looking for a buildvector of fptext elements, where all the
7732 // elements are alternating lanes from a single source. For example <0,2,4,6>
7733 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7734 // info from them (they are checked properly in the loop below).
7735 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7737 return SDValue();
7738 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7740 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7741 return SDValue();
7742
7743 // Check all the values in the BuildVector line up with our expectations.
7744 for (unsigned i = 1; i < 4; i++) {
7745 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7746 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7748 Trunc.getOperand(0).getOperand(0) == Op &&
7749 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7750 };
7751 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7752 return SDValue();
7753 }
7754
7755 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7756 DAG.getConstant(Offset, dl, MVT::i32));
7757}
7758
7759// If N is an integer constant that can be moved into a register in one
7760// instruction, return an SDValue of such a constant (will become a MOV
7761// instruction). Otherwise return null.
7763 const ARMSubtarget *ST, const SDLoc &dl) {
7764 uint64_t Val;
7765 if (!isa<ConstantSDNode>(N))
7766 return SDValue();
7767 Val = N->getAsZExtVal();
7768
7769 if (ST->isThumb1Only()) {
7770 if (Val <= 255 || ~Val <= 255)
7771 return DAG.getConstant(Val, dl, MVT::i32);
7772 } else {
7773 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7774 return DAG.getConstant(Val, dl, MVT::i32);
7775 }
7776 return SDValue();
7777}
7778
7780 const ARMSubtarget *ST) {
7781 SDLoc dl(Op);
7782 EVT VT = Op.getValueType();
7783
7784 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7785
7786 unsigned NumElts = VT.getVectorNumElements();
7787 unsigned BoolMask;
7788 unsigned BitsPerBool;
7789 if (NumElts == 2) {
7790 BitsPerBool = 8;
7791 BoolMask = 0xff;
7792 } else if (NumElts == 4) {
7793 BitsPerBool = 4;
7794 BoolMask = 0xf;
7795 } else if (NumElts == 8) {
7796 BitsPerBool = 2;
7797 BoolMask = 0x3;
7798 } else if (NumElts == 16) {
7799 BitsPerBool = 1;
7800 BoolMask = 0x1;
7801 } else
7802 return SDValue();
7803
7804 // If this is a single value copied into all lanes (a splat), we can just sign
7805 // extend that single value
7806 SDValue FirstOp = Op.getOperand(0);
7807 if (!isa<ConstantSDNode>(FirstOp) &&
7808 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7809 return U.get().isUndef() || U.get() == FirstOp;
7810 })) {
7811 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7812 DAG.getValueType(MVT::i1));
7813 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7814 }
7815
7816 // First create base with bits set where known
7817 unsigned Bits32 = 0;
7818 for (unsigned i = 0; i < NumElts; ++i) {
7819 SDValue V = Op.getOperand(i);
7820 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7821 continue;
7822 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7823 if (BitSet)
7824 Bits32 |= BoolMask << (i * BitsPerBool);
7825 }
7826
7827 // Add in unknown nodes
7829 DAG.getConstant(Bits32, dl, MVT::i32));
7830 for (unsigned i = 0; i < NumElts; ++i) {
7831 SDValue V = Op.getOperand(i);
7832 if (isa<ConstantSDNode>(V) || V.isUndef())
7833 continue;
7834 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7835 DAG.getConstant(i, dl, MVT::i32));
7836 }
7837
7838 return Base;
7839}
7840
7842 const ARMSubtarget *ST) {
7843 if (!ST->hasMVEIntegerOps())
7844 return SDValue();
7845
7846 // We are looking for a buildvector where each element is Op[0] + i*N
7847 EVT VT = Op.getValueType();
7848 SDValue Op0 = Op.getOperand(0);
7849 unsigned NumElts = VT.getVectorNumElements();
7850
7851 // Get the increment value from operand 1
7852 SDValue Op1 = Op.getOperand(1);
7853 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7854 !isa<ConstantSDNode>(Op1.getOperand(1)))
7855 return SDValue();
7856 unsigned N = Op1.getConstantOperandVal(1);
7857 if (N != 1 && N != 2 && N != 4 && N != 8)
7858 return SDValue();
7859
7860 // Check that each other operand matches
7861 for (unsigned I = 2; I < NumElts; I++) {
7862 SDValue OpI = Op.getOperand(I);
7863 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7864 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7865 OpI.getConstantOperandVal(1) != I * N)
7866 return SDValue();
7867 }
7868
7869 SDLoc DL(Op);
7870 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7871 DAG.getConstant(N, DL, MVT::i32));
7872}
7873
7874// Returns true if the operation N can be treated as qr instruction variant at
7875// operand Op.
7876static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7877 switch (N->getOpcode()) {
7878 case ISD::ADD:
7879 case ISD::MUL:
7880 case ISD::SADDSAT:
7881 case ISD::UADDSAT:
7882 return true;
7883 case ISD::SUB:
7884 case ISD::SSUBSAT:
7885 case ISD::USUBSAT:
7886 return N->getOperand(1).getNode() == Op;
7888 switch (N->getConstantOperandVal(0)) {
7889 case Intrinsic::arm_mve_add_predicated:
7890 case Intrinsic::arm_mve_mul_predicated:
7891 case Intrinsic::arm_mve_qadd_predicated:
7892 case Intrinsic::arm_mve_vhadd:
7893 case Intrinsic::arm_mve_hadd_predicated:
7894 case Intrinsic::arm_mve_vqdmulh:
7895 case Intrinsic::arm_mve_qdmulh_predicated:
7896 case Intrinsic::arm_mve_vqrdmulh:
7897 case Intrinsic::arm_mve_qrdmulh_predicated:
7898 case Intrinsic::arm_mve_vqdmull:
7899 case Intrinsic::arm_mve_vqdmull_predicated:
7900 return true;
7901 case Intrinsic::arm_mve_sub_predicated:
7902 case Intrinsic::arm_mve_qsub_predicated:
7903 case Intrinsic::arm_mve_vhsub:
7904 case Intrinsic::arm_mve_hsub_predicated:
7905 return N->getOperand(2).getNode() == Op;
7906 default:
7907 return false;
7908 }
7909 default:
7910 return false;
7911 }
7912}
7913
7914// If this is a case we can't handle, return null and let the default
7915// expansion code take care of it.
7916SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7917 const ARMSubtarget *ST) const {
7918 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7919 SDLoc dl(Op);
7920 EVT VT = Op.getValueType();
7921
7922 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7923 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7924
7925 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7926 return R;
7927
7928 APInt SplatBits, SplatUndef;
7929 unsigned SplatBitSize;
7930 bool HasAnyUndefs;
7931 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7932 if (SplatUndef.isAllOnes())
7933 return DAG.getUNDEF(VT);
7934
7935 // If all the users of this constant splat are qr instruction variants,
7936 // generate a vdup of the constant.
7937 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7938 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7939 all_of(BVN->uses(),
7940 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7941 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7942 : SplatBitSize == 16 ? MVT::v8i16
7943 : MVT::v16i8;
7944 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7945 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7946 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7947 }
7948
7949 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7950 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7951 // Check if an immediate VMOV works.
7952 EVT VmovVT;
7953 SDValue Val =
7954 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7955 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7956
7957 if (Val.getNode()) {
7958 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7959 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7960 }
7961
7962 // Try an immediate VMVN.
7963 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7964 Val = isVMOVModifiedImm(
7965 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7966 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7967 if (Val.getNode()) {
7968 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7969 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7970 }
7971
7972 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7973 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7974 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7975 if (ImmVal != -1) {
7976 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7977 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7978 }
7979 }
7980
7981 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7982 // type.
7983 if (ST->hasMVEIntegerOps() &&
7984 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7985 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7986 : SplatBitSize == 16 ? MVT::v8i16
7987 : MVT::v16i8;
7988 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7989 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7990 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7991 }
7992 }
7993 }
7994
7995 // Scan through the operands to see if only one value is used.
7996 //
7997 // As an optimisation, even if more than one value is used it may be more
7998 // profitable to splat with one value then change some lanes.
7999 //
8000 // Heuristically we decide to do this if the vector has a "dominant" value,
8001 // defined as splatted to more than half of the lanes.
8002 unsigned NumElts = VT.getVectorNumElements();
8003 bool isOnlyLowElement = true;
8004 bool usesOnlyOneValue = true;
8005 bool hasDominantValue = false;
8006 bool isConstant = true;
8007
8008 // Map of the number of times a particular SDValue appears in the
8009 // element list.
8010 DenseMap<SDValue, unsigned> ValueCounts;
8011 SDValue Value;
8012 for (unsigned i = 0; i < NumElts; ++i) {
8013 SDValue V = Op.getOperand(i);
8014 if (V.isUndef())
8015 continue;
8016 if (i > 0)
8017 isOnlyLowElement = false;
8018 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
8019 isConstant = false;
8020
8021 ValueCounts.insert(std::make_pair(V, 0));
8022 unsigned &Count = ValueCounts[V];
8023
8024 // Is this value dominant? (takes up more than half of the lanes)
8025 if (++Count > (NumElts / 2)) {
8026 hasDominantValue = true;
8027 Value = V;
8028 }
8029 }
8030 if (ValueCounts.size() != 1)
8031 usesOnlyOneValue = false;
8032 if (!Value.getNode() && !ValueCounts.empty())
8033 Value = ValueCounts.begin()->first;
8034
8035 if (ValueCounts.empty())
8036 return DAG.getUNDEF(VT);
8037
8038 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8039 // Keep going if we are hitting this case.
8040 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8041 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8042
8043 unsigned EltSize = VT.getScalarSizeInBits();
8044
8045 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8046 // i32 and try again.
8047 if (hasDominantValue && EltSize <= 32) {
8048 if (!isConstant) {
8049 SDValue N;
8050
8051 // If we are VDUPing a value that comes directly from a vector, that will
8052 // cause an unnecessary move to and from a GPR, where instead we could
8053 // just use VDUPLANE. We can only do this if the lane being extracted
8054 // is at a constant index, as the VDUP from lane instructions only have
8055 // constant-index forms.
8056 ConstantSDNode *constIndex;
8057 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8058 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8059 // We need to create a new undef vector to use for the VDUPLANE if the
8060 // size of the vector from which we get the value is different than the
8061 // size of the vector that we need to create. We will insert the element
8062 // such that the register coalescer will remove unnecessary copies.
8063 if (VT != Value->getOperand(0).getValueType()) {
8064 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8066 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8067 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8068 Value, DAG.getConstant(index, dl, MVT::i32)),
8069 DAG.getConstant(index, dl, MVT::i32));
8070 } else
8071 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8072 Value->getOperand(0), Value->getOperand(1));
8073 } else
8074 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8075
8076 if (!usesOnlyOneValue) {
8077 // The dominant value was splatted as 'N', but we now have to insert
8078 // all differing elements.
8079 for (unsigned I = 0; I < NumElts; ++I) {
8080 if (Op.getOperand(I) == Value)
8081 continue;
8083 Ops.push_back(N);
8084 Ops.push_back(Op.getOperand(I));
8085 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8086 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8087 }
8088 }
8089 return N;
8090 }
8094 assert(FVT == MVT::f32 || FVT == MVT::f16);
8095 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8096 for (unsigned i = 0; i < NumElts; ++i)
8097 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8098 Op.getOperand(i)));
8099 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8100 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8101 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8102 if (Val.getNode())
8103 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8104 }
8105 if (usesOnlyOneValue) {
8106 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8107 if (isConstant && Val.getNode())
8108 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8109 }
8110 }
8111
8112 // If all elements are constants and the case above didn't get hit, fall back
8113 // to the default expansion, which will generate a load from the constant
8114 // pool.
8115 if (isConstant)
8116 return SDValue();
8117
8118 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8119 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8120 // length <= 2.
8121 if (NumElts >= 4)
8122 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8123 return shuffle;
8124
8125 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8126 // VCVT's
8127 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8128 return VCVT;
8129 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8130 return VCVT;
8131
8132 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8133 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8134 // into two 64-bit vectors; we might discover a better way to lower it.
8135 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8136 EVT ExtVT = VT.getVectorElementType();
8137 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8138 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8139 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8140 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8141 SDValue Upper =
8142 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8143 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8144 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8145 if (Lower && Upper)
8146 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8147 }
8148
8149 // Vectors with 32- or 64-bit elements can be built by directly assigning
8150 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8151 // will be legalized.
8152 if (EltSize >= 32) {
8153 // Do the expansion with floating-point types, since that is what the VFP
8154 // registers are defined to use, and since i64 is not legal.
8155 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8156 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8158 for (unsigned i = 0; i < NumElts; ++i)
8159 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8160 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8161 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8162 }
8163
8164 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8165 // know the default expansion would otherwise fall back on something even
8166 // worse. For a vector with one or two non-undef values, that's
8167 // scalar_to_vector for the elements followed by a shuffle (provided the
8168 // shuffle is valid for the target) and materialization element by element
8169 // on the stack followed by a load for everything else.
8170 if (!isConstant && !usesOnlyOneValue) {
8171 SDValue Vec = DAG.getUNDEF(VT);
8172 for (unsigned i = 0 ; i < NumElts; ++i) {
8173 SDValue V = Op.getOperand(i);
8174 if (V.isUndef())
8175 continue;
8176 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8177 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8178 }
8179 return Vec;
8180 }
8181
8182 return SDValue();
8183}
8184
8185// Gather data to see if the operation can be modelled as a
8186// shuffle in combination with VEXTs.
8187SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8188 SelectionDAG &DAG) const {
8189 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8190 SDLoc dl(Op);
8191 EVT VT = Op.getValueType();
8192 unsigned NumElts = VT.getVectorNumElements();
8193
8194 struct ShuffleSourceInfo {
8195 SDValue Vec;
8196 unsigned MinElt = std::numeric_limits<unsigned>::max();
8197 unsigned MaxElt = 0;
8198
8199 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8200 // be compatible with the shuffle we intend to construct. As a result
8201 // ShuffleVec will be some sliding window into the original Vec.
8202 SDValue ShuffleVec;
8203
8204 // Code should guarantee that element i in Vec starts at element "WindowBase
8205 // + i * WindowScale in ShuffleVec".
8206 int WindowBase = 0;
8207 int WindowScale = 1;
8208
8209 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8210
8211 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8212 };
8213
8214 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8215 // node.
8217 for (unsigned i = 0; i < NumElts; ++i) {
8218 SDValue V = Op.getOperand(i);
8219 if (V.isUndef())
8220 continue;
8221 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8222 // A shuffle can only come from building a vector from various
8223 // elements of other vectors.
8224 return SDValue();
8225 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8226 // Furthermore, shuffles require a constant mask, whereas extractelts
8227 // accept variable indices.
8228 return SDValue();
8229 }
8230
8231 // Add this element source to the list if it's not already there.
8232 SDValue SourceVec = V.getOperand(0);
8233 auto Source = llvm::find(Sources, SourceVec);
8234 if (Source == Sources.end())
8235 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8236
8237 // Update the minimum and maximum lane number seen.
8238 unsigned EltNo = V.getConstantOperandVal(1);
8239 Source->MinElt = std::min(Source->MinElt, EltNo);
8240 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8241 }
8242
8243 // Currently only do something sane when at most two source vectors
8244 // are involved.
8245 if (Sources.size() > 2)
8246 return SDValue();
8247
8248 // Find out the smallest element size among result and two sources, and use
8249 // it as element size to build the shuffle_vector.
8250 EVT SmallestEltTy = VT.getVectorElementType();
8251 for (auto &Source : Sources) {
8252 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8253 if (SrcEltTy.bitsLT(SmallestEltTy))
8254 SmallestEltTy = SrcEltTy;
8255 }
8256 unsigned ResMultiplier =
8257 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8258 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8259 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8260
8261 // If the source vector is too wide or too narrow, we may nevertheless be able
8262 // to construct a compatible shuffle either by concatenating it with UNDEF or
8263 // extracting a suitable range of elements.
8264 for (auto &Src : Sources) {
8265 EVT SrcVT = Src.ShuffleVec.getValueType();
8266
8267 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8268 uint64_t VTSize = VT.getFixedSizeInBits();
8269 if (SrcVTSize == VTSize)
8270 continue;
8271
8272 // This stage of the search produces a source with the same element type as
8273 // the original, but with a total width matching the BUILD_VECTOR output.
8274 EVT EltVT = SrcVT.getVectorElementType();
8275 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8276 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8277
8278 if (SrcVTSize < VTSize) {
8279 if (2 * SrcVTSize != VTSize)
8280 return SDValue();
8281 // We can pad out the smaller vector for free, so if it's part of a
8282 // shuffle...
8283 Src.ShuffleVec =
8284 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8285 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8286 continue;
8287 }
8288
8289 if (SrcVTSize != 2 * VTSize)
8290 return SDValue();
8291
8292 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8293 // Span too large for a VEXT to cope
8294 return SDValue();
8295 }
8296
8297 if (Src.MinElt >= NumSrcElts) {
8298 // The extraction can just take the second half
8299 Src.ShuffleVec =
8300 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8301 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8302 Src.WindowBase = -NumSrcElts;
8303 } else if (Src.MaxElt < NumSrcElts) {
8304 // The extraction can just take the first half
8305 Src.ShuffleVec =
8306 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8307 DAG.getConstant(0, dl, MVT::i32));
8308 } else {
8309 // An actual VEXT is needed
8310 SDValue VEXTSrc1 =
8311 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8312 DAG.getConstant(0, dl, MVT::i32));
8313 SDValue VEXTSrc2 =
8314 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8315 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8316
8317 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8318 VEXTSrc2,
8319 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8320 Src.WindowBase = -Src.MinElt;
8321 }
8322 }
8323
8324 // Another possible incompatibility occurs from the vector element types. We
8325 // can fix this by bitcasting the source vectors to the same type we intend
8326 // for the shuffle.
8327 for (auto &Src : Sources) {
8328 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8329 if (SrcEltTy == SmallestEltTy)
8330 continue;
8331 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8332 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8333 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8334 Src.WindowBase *= Src.WindowScale;
8335 }
8336
8337 // Final check before we try to actually produce a shuffle.
8338 LLVM_DEBUG(for (auto Src
8339 : Sources)
8340 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8341
8342 // The stars all align, our next step is to produce the mask for the shuffle.
8344 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8345 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8346 SDValue Entry = Op.getOperand(i);
8347 if (Entry.isUndef())
8348 continue;
8349
8350 auto Src = llvm::find(Sources, Entry.getOperand(0));
8351 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8352
8353 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8354 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8355 // segment.
8356 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8357 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8358 VT.getScalarSizeInBits());
8359 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8360
8361 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8362 // starting at the appropriate offset.
8363 int *LaneMask = &Mask[i * ResMultiplier];
8364
8365 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8366 ExtractBase += NumElts * (Src - Sources.begin());
8367 for (int j = 0; j < LanesDefined; ++j)
8368 LaneMask[j] = ExtractBase + j;
8369 }
8370
8371
8372 // We can't handle more than two sources. This should have already
8373 // been checked before this point.
8374 assert(Sources.size() <= 2 && "Too many sources!");
8375
8376 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8377 for (unsigned i = 0; i < Sources.size(); ++i)
8378 ShuffleOps[i] = Sources[i].ShuffleVec;
8379
8380 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8381 ShuffleOps[1], Mask, DAG);
8382 if (!Shuffle)
8383 return SDValue();
8384 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8385}
8386
8388 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8397 OP_VUZPL, // VUZP, left result
8398 OP_VUZPR, // VUZP, right result
8399 OP_VZIPL, // VZIP, left result
8400 OP_VZIPR, // VZIP, right result
8401 OP_VTRNL, // VTRN, left result
8402 OP_VTRNR // VTRN, right result
8404
8405static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8406 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8407 switch (OpNum) {
8408 case OP_COPY:
8409 case OP_VREV:
8410 case OP_VDUP0:
8411 case OP_VDUP1:
8412 case OP_VDUP2:
8413 case OP_VDUP3:
8414 return true;
8415 }
8416 return false;
8417}
8418
8419/// isShuffleMaskLegal - Targets can use this to indicate that they only
8420/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8421/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8422/// are assumed to be legal.
8424 if (VT.getVectorNumElements() == 4 &&
8425 (VT.is128BitVector() || VT.is64BitVector())) {
8426 unsigned PFIndexes[4];
8427 for (unsigned i = 0; i != 4; ++i) {
8428 if (M[i] < 0)
8429 PFIndexes[i] = 8;
8430 else
8431 PFIndexes[i] = M[i];
8432 }
8433
8434 // Compute the index in the perfect shuffle table.
8435 unsigned PFTableIndex =
8436 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8437 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8438 unsigned Cost = (PFEntry >> 30);
8439
8440 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8441 return true;
8442 }
8443
8444 bool ReverseVEXT, isV_UNDEF;
8445 unsigned Imm, WhichResult;
8446
8447 unsigned EltSize = VT.getScalarSizeInBits();
8448 if (EltSize >= 32 ||
8450 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8451 isVREVMask(M, VT, 64) ||
8452 isVREVMask(M, VT, 32) ||
8453 isVREVMask(M, VT, 16))
8454 return true;
8455 else if (Subtarget->hasNEON() &&
8456 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8457 isVTBLMask(M, VT) ||
8458 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8459 return true;
8460 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8461 isReverseMask(M, VT))
8462 return true;
8463 else if (Subtarget->hasMVEIntegerOps() &&
8464 (isVMOVNMask(M, VT, true, false) ||
8465 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8466 return true;
8467 else if (Subtarget->hasMVEIntegerOps() &&
8468 (isTruncMask(M, VT, false, false) ||
8469 isTruncMask(M, VT, false, true) ||
8470 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8471 return true;
8472 else
8473 return false;
8474}
8475
8476/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8477/// the specified operations to build the shuffle.
8478static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8479 SDValue RHS, SelectionDAG &DAG,
8480 const SDLoc &dl) {
8481 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8482 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8483 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8484
8485 if (OpNum == OP_COPY) {
8486 if (LHSID == (1*9+2)*9+3) return LHS;
8487 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8488 return RHS;
8489 }
8490
8491 SDValue OpLHS, OpRHS;
8492 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8493 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8494 EVT VT = OpLHS.getValueType();
8495
8496 switch (OpNum) {
8497 default: llvm_unreachable("Unknown shuffle opcode!");
8498 case OP_VREV:
8499 // VREV divides the vector in half and swaps within the half.
8500 if (VT.getScalarSizeInBits() == 32)
8501 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8502 // vrev <4 x i16> -> VREV32
8503 if (VT.getScalarSizeInBits() == 16)
8504 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8505 // vrev <4 x i8> -> VREV16
8506 assert(VT.getScalarSizeInBits() == 8);
8507 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8508 case OP_VDUP0:
8509 case OP_VDUP1:
8510 case OP_VDUP2:
8511 case OP_VDUP3:
8512 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8513 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8514 case OP_VEXT1:
8515 case OP_VEXT2:
8516 case OP_VEXT3:
8517 return DAG.getNode(ARMISD::VEXT, dl, VT,
8518 OpLHS, OpRHS,
8519 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8520 case OP_VUZPL:
8521 case OP_VUZPR:
8522 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8523 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8524 case OP_VZIPL:
8525 case OP_VZIPR:
8526 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8527 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8528 case OP_VTRNL:
8529 case OP_VTRNR:
8530 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8531 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8532 }
8533}
8534
8536 ArrayRef<int> ShuffleMask,
8537 SelectionDAG &DAG) {
8538 // Check to see if we can use the VTBL instruction.
8539 SDValue V1 = Op.getOperand(0);
8540 SDValue V2 = Op.getOperand(1);
8541 SDLoc DL(Op);
8542
8543 SmallVector<SDValue, 8> VTBLMask;
8544 for (int I : ShuffleMask)
8545 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8546
8547 if (V2.getNode()->isUndef())
8548 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8549 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8550
8551 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8552 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8553}
8554
8556 SDLoc DL(Op);
8557 EVT VT = Op.getValueType();
8558
8559 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8560 "Expect an v8i16/v16i8 type");
8561 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8562 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8563 // extract the first 8 bytes into the top double word and the last 8 bytes
8564 // into the bottom double word, through a new vector shuffle that will be
8565 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8566 std::vector<int> NewMask;
8567 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8568 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8569 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8570 NewMask.push_back(i);
8571 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8572}
8573
8575 switch (VT.getSimpleVT().SimpleTy) {
8576 case MVT::v2i1:
8577 return MVT::v2f64;
8578 case MVT::v4i1:
8579 return MVT::v4i32;
8580 case MVT::v8i1:
8581 return MVT::v8i16;
8582 case MVT::v16i1:
8583 return MVT::v16i8;
8584 default:
8585 llvm_unreachable("Unexpected vector predicate type");
8586 }
8587}
8588
8590 SelectionDAG &DAG) {
8591 // Converting from boolean predicates to integers involves creating a vector
8592 // of all ones or all zeroes and selecting the lanes based upon the real
8593 // predicate.
8595 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8596 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8597
8598 SDValue AllZeroes =
8599 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8600 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8601
8602 // Get full vector type from predicate type
8604
8605 SDValue RecastV1;
8606 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8607 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8608 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8609 // since we know in hardware the sizes are really the same.
8610 if (VT != MVT::v16i1)
8611 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8612 else
8613 RecastV1 = Pred;
8614
8615 // Select either all ones or zeroes depending upon the real predicate bits.
8616 SDValue PredAsVector =
8617 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8618
8619 // Recast our new predicate-as-integer v16i8 vector into something
8620 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8621 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8622}
8623
8625 const ARMSubtarget *ST) {
8626 EVT VT = Op.getValueType();
8627 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8628 ArrayRef<int> ShuffleMask = SVN->getMask();
8629
8630 assert(ST->hasMVEIntegerOps() &&
8631 "No support for vector shuffle of boolean predicates");
8632
8633 SDValue V1 = Op.getOperand(0);
8634 SDValue V2 = Op.getOperand(1);
8635 SDLoc dl(Op);
8636 if (isReverseMask(ShuffleMask, VT)) {
8637 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8638 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8639 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8640 DAG.getConstant(16, dl, MVT::i32));
8641 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8642 }
8643
8644 // Until we can come up with optimised cases for every single vector
8645 // shuffle in existence we have chosen the least painful strategy. This is
8646 // to essentially promote the boolean predicate to a 8-bit integer, where
8647 // each predicate represents a byte. Then we fall back on a normal integer
8648 // vector shuffle and convert the result back into a predicate vector. In
8649 // many cases the generated code might be even better than scalar code
8650 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8651 // fields in a register into 8 other arbitrary 2-bit fields!
8652 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8653 EVT NewVT = PredAsVector1.getValueType();
8654 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8655 : PromoteMVEPredVector(dl, V2, VT, DAG);
8656 assert(PredAsVector2.getValueType() == NewVT &&
8657 "Expected identical vector type in expanded i1 shuffle!");
8658
8659 // Do the shuffle!
8660 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8661 PredAsVector2, ShuffleMask);
8662
8663 // Now return the result of comparing the shuffled vector with zero,
8664 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8665 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8666 if (VT == MVT::v2i1) {
8667 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8668 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8669 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8670 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8671 }
8672 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8673 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8674}
8675
8677 ArrayRef<int> ShuffleMask,
8678 SelectionDAG &DAG) {
8679 // Attempt to lower the vector shuffle using as many whole register movs as
8680 // possible. This is useful for types smaller than 32bits, which would
8681 // often otherwise become a series for grp movs.
8682 SDLoc dl(Op);
8683 EVT VT = Op.getValueType();
8684 if (VT.getScalarSizeInBits() >= 32)
8685 return SDValue();
8686
8687 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8688 "Unexpected vector type");
8689 int NumElts = VT.getVectorNumElements();
8690 int QuarterSize = NumElts / 4;
8691 // The four final parts of the vector, as i32's
8692 SDValue Parts[4];
8693
8694 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8695 // <u,u,u,u>), returning the vmov lane index
8696 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8697 // Detect which mov lane this would be from the first non-undef element.
8698 int MovIdx = -1;
8699 for (int i = 0; i < Length; i++) {
8700 if (ShuffleMask[Start + i] >= 0) {
8701 if (ShuffleMask[Start + i] % Length != i)
8702 return -1;
8703 MovIdx = ShuffleMask[Start + i] / Length;
8704 break;
8705 }
8706 }
8707 // If all items are undef, leave this for other combines
8708 if (MovIdx == -1)
8709 return -1;
8710 // Check the remaining values are the correct part of the same mov
8711 for (int i = 1; i < Length; i++) {
8712 if (ShuffleMask[Start + i] >= 0 &&
8713 (ShuffleMask[Start + i] / Length != MovIdx ||
8714 ShuffleMask[Start + i] % Length != i))
8715 return -1;
8716 }
8717 return MovIdx;
8718 };
8719
8720 for (int Part = 0; Part < 4; ++Part) {
8721 // Does this part look like a mov
8722 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8723 if (Elt != -1) {
8724 SDValue Input = Op->getOperand(0);
8725 if (Elt >= 4) {
8726 Input = Op->getOperand(1);
8727 Elt -= 4;
8728 }
8729 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8730 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8731 DAG.getConstant(Elt, dl, MVT::i32));
8732 }
8733 }
8734
8735 // Nothing interesting found, just return
8736 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8737 return SDValue();
8738
8739 // The other parts need to be built with the old shuffle vector, cast to a
8740 // v4i32 and extract_vector_elts
8741 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8742 SmallVector<int, 16> NewShuffleMask;
8743 for (int Part = 0; Part < 4; ++Part)
8744 for (int i = 0; i < QuarterSize; i++)
8745 NewShuffleMask.push_back(
8746 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8747 SDValue NewShuffle = DAG.getVectorShuffle(
8748 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8749 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8750
8751 for (int Part = 0; Part < 4; ++Part)
8752 if (!Parts[Part])
8753 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8754 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8755 }
8756 // Build a vector out of the various parts and bitcast it back to the original
8757 // type.
8758 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8759 return DAG.getBitcast(VT, NewVec);
8760}
8761
8763 ArrayRef<int> ShuffleMask,
8764 SelectionDAG &DAG) {
8765 SDValue V1 = Op.getOperand(0);
8766 SDValue V2 = Op.getOperand(1);
8767 EVT VT = Op.getValueType();
8768 unsigned NumElts = VT.getVectorNumElements();
8769
8770 // An One-Off Identity mask is one that is mostly an identity mask from as
8771 // single source but contains a single element out-of-place, either from a
8772 // different vector or from another position in the same vector. As opposed to
8773 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8774 // pair directly.
8775 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8776 int &OffElement) {
8777 OffElement = -1;
8778 int NonUndef = 0;
8779 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8780 if (Mask[i] == -1)
8781 continue;
8782 NonUndef++;
8783 if (Mask[i] != i + BaseOffset) {
8784 if (OffElement == -1)
8785 OffElement = i;
8786 else
8787 return false;
8788 }
8789 }
8790 return NonUndef > 2 && OffElement != -1;
8791 };
8792 int OffElement;
8793 SDValue VInput;
8794 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8795 VInput = V1;
8796 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8797 VInput = V2;
8798 else
8799 return SDValue();
8800
8801 SDLoc dl(Op);
8802 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8803 ? MVT::i32
8804 : VT.getScalarType();
8805 SDValue Elt = DAG.getNode(
8806 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8807 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8808 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8809 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8810 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8811}
8812
8814 const ARMSubtarget *ST) {
8815 SDValue V1 = Op.getOperand(0);
8816 SDValue V2 = Op.getOperand(1);
8817 SDLoc dl(Op);
8818 EVT VT = Op.getValueType();
8819 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8820 unsigned EltSize = VT.getScalarSizeInBits();
8821
8822 if (ST->hasMVEIntegerOps() && EltSize == 1)
8823 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8824
8825 // Convert shuffles that are directly supported on NEON to target-specific
8826 // DAG nodes, instead of keeping them as shuffles and matching them again
8827 // during code selection. This is more efficient and avoids the possibility
8828 // of inconsistencies between legalization and selection.
8829 // FIXME: floating-point vectors should be canonicalized to integer vectors
8830 // of the same time so that they get CSEd properly.
8831 ArrayRef<int> ShuffleMask = SVN->getMask();
8832
8833 if (EltSize <= 32) {
8834 if (SVN->isSplat()) {
8835 int Lane = SVN->getSplatIndex();
8836 // If this is undef splat, generate it via "just" vdup, if possible.
8837 if (Lane == -1) Lane = 0;
8838
8839 // Test if V1 is a SCALAR_TO_VECTOR.
8840 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8841 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8842 }
8843 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8844 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8845 // reaches it).
8846 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8847 !isa<ConstantSDNode>(V1.getOperand(0))) {
8848 bool IsScalarToVector = true;
8849 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8850 if (!V1.getOperand(i).isUndef()) {
8851 IsScalarToVector = false;
8852 break;
8853 }
8854 if (IsScalarToVector)
8855 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8856 }
8857 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8858 DAG.getConstant(Lane, dl, MVT::i32));
8859 }
8860
8861 bool ReverseVEXT = false;
8862 unsigned Imm = 0;
8863 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8864 if (ReverseVEXT)
8865 std::swap(V1, V2);
8866 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8867 DAG.getConstant(Imm, dl, MVT::i32));
8868 }
8869
8870 if (isVREVMask(ShuffleMask, VT, 64))
8871 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8872 if (isVREVMask(ShuffleMask, VT, 32))
8873 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8874 if (isVREVMask(ShuffleMask, VT, 16))
8875 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8876
8877 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8878 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8879 DAG.getConstant(Imm, dl, MVT::i32));
8880 }
8881
8882 // Check for Neon shuffles that modify both input vectors in place.
8883 // If both results are used, i.e., if there are two shuffles with the same
8884 // source operands and with masks corresponding to both results of one of
8885 // these operations, DAG memoization will ensure that a single node is
8886 // used for both shuffles.
8887 unsigned WhichResult = 0;
8888 bool isV_UNDEF = false;
8889 if (ST->hasNEON()) {
8890 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8891 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8892 if (isV_UNDEF)
8893 V2 = V1;
8894 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8895 .getValue(WhichResult);
8896 }
8897 }
8898 if (ST->hasMVEIntegerOps()) {
8899 if (isVMOVNMask(ShuffleMask, VT, false, false))
8900 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8901 DAG.getConstant(0, dl, MVT::i32));
8902 if (isVMOVNMask(ShuffleMask, VT, true, false))
8903 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8904 DAG.getConstant(1, dl, MVT::i32));
8905 if (isVMOVNMask(ShuffleMask, VT, true, true))
8906 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8907 DAG.getConstant(1, dl, MVT::i32));
8908 }
8909
8910 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8911 // shuffles that produce a result larger than their operands with:
8912 // shuffle(concat(v1, undef), concat(v2, undef))
8913 // ->
8914 // shuffle(concat(v1, v2), undef)
8915 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8916 //
8917 // This is useful in the general case, but there are special cases where
8918 // native shuffles produce larger results: the two-result ops.
8919 //
8920 // Look through the concat when lowering them:
8921 // shuffle(concat(v1, v2), undef)
8922 // ->
8923 // concat(VZIP(v1, v2):0, :1)
8924 //
8925 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8926 SDValue SubV1 = V1->getOperand(0);
8927 SDValue SubV2 = V1->getOperand(1);
8928 EVT SubVT = SubV1.getValueType();
8929
8930 // We expect these to have been canonicalized to -1.
8931 assert(llvm::all_of(ShuffleMask, [&](int i) {
8932 return i < (int)VT.getVectorNumElements();
8933 }) && "Unexpected shuffle index into UNDEF operand!");
8934
8935 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8936 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8937 if (isV_UNDEF)
8938 SubV2 = SubV1;
8939 assert((WhichResult == 0) &&
8940 "In-place shuffle of concat can only have one result!");
8941 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8942 SubV1, SubV2);
8943 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8944 Res.getValue(1));
8945 }
8946 }
8947 }
8948
8949 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8950 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8951 return V;
8952
8953 for (bool Top : {false, true}) {
8954 for (bool SingleSource : {false, true}) {
8955 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8956 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8957 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8958 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8959 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8960 SingleSource ? V1 : V2);
8961 if (Top) {
8962 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8963 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8964 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8965 }
8966 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8967 }
8968 }
8969 }
8970 }
8971
8972 // If the shuffle is not directly supported and it has 4 elements, use
8973 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8974 unsigned NumElts = VT.getVectorNumElements();
8975 if (NumElts == 4) {
8976 unsigned PFIndexes[4];
8977 for (unsigned i = 0; i != 4; ++i) {
8978 if (ShuffleMask[i] < 0)
8979 PFIndexes[i] = 8;
8980 else
8981 PFIndexes[i] = ShuffleMask[i];
8982 }
8983
8984 // Compute the index in the perfect shuffle table.
8985 unsigned PFTableIndex =
8986 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8987 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8988 unsigned Cost = (PFEntry >> 30);
8989
8990 if (Cost <= 4) {
8991 if (ST->hasNEON())
8992 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8993 else if (isLegalMVEShuffleOp(PFEntry)) {
8994 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8995 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8996 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8997 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8998 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8999 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9000 }
9001 }
9002 }
9003
9004 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9005 if (EltSize >= 32) {
9006 // Do the expansion with floating-point types, since that is what the VFP
9007 // registers are defined to use, and since i64 is not legal.
9008 EVT EltVT = EVT::getFloatingPointVT(EltSize);
9009 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
9010 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
9011 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
9013 for (unsigned i = 0; i < NumElts; ++i) {
9014 if (ShuffleMask[i] < 0)
9015 Ops.push_back(DAG.getUNDEF(EltVT));
9016 else
9017 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
9018 ShuffleMask[i] < (int)NumElts ? V1 : V2,
9019 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9020 dl, MVT::i32)));
9021 }
9022 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9023 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9024 }
9025
9026 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9027 isReverseMask(ShuffleMask, VT))
9028 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9029
9030 if (ST->hasNEON() && VT == MVT::v8i8)
9031 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9032 return NewOp;
9033
9034 if (ST->hasMVEIntegerOps())
9035 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9036 return NewOp;
9037
9038 return SDValue();
9039}
9040
9042 const ARMSubtarget *ST) {
9043 EVT VecVT = Op.getOperand(0).getValueType();
9044 SDLoc dl(Op);
9045
9046 assert(ST->hasMVEIntegerOps() &&
9047 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9048
9049 SDValue Conv =
9050 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9051 unsigned Lane = Op.getConstantOperandVal(2);
9052 unsigned LaneWidth =
9054 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9055 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9056 Op.getOperand(1), DAG.getValueType(MVT::i1));
9057 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9058 DAG.getConstant(~Mask, dl, MVT::i32));
9059 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9060}
9061
9062SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9063 SelectionDAG &DAG) const {
9064 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9065 SDValue Lane = Op.getOperand(2);
9066 if (!isa<ConstantSDNode>(Lane))
9067 return SDValue();
9068
9069 SDValue Elt = Op.getOperand(1);
9070 EVT EltVT = Elt.getValueType();
9071
9072 if (Subtarget->hasMVEIntegerOps() &&
9073 Op.getValueType().getScalarSizeInBits() == 1)
9074 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9075
9076 if (getTypeAction(*DAG.getContext(), EltVT) ==
9078 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9079 // but the type system will try to do that if we don't intervene.
9080 // Reinterpret any such vector-element insertion as one with the
9081 // corresponding integer types.
9082
9083 SDLoc dl(Op);
9084
9085 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9086 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9088
9089 SDValue VecIn = Op.getOperand(0);
9090 EVT VecVT = VecIn.getValueType();
9091 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9092 VecVT.getVectorNumElements());
9093
9094 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9095 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9096 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9097 IVecIn, IElt, Lane);
9098 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9099 }
9100
9101 return Op;
9102}
9103
9105 const ARMSubtarget *ST) {
9106 EVT VecVT = Op.getOperand(0).getValueType();
9107 SDLoc dl(Op);
9108
9109 assert(ST->hasMVEIntegerOps() &&
9110 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9111
9112 SDValue Conv =
9113 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9114 unsigned Lane = Op.getConstantOperandVal(1);
9115 unsigned LaneWidth =
9117 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9118 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9119 return Shift;
9120}
9121
9123 const ARMSubtarget *ST) {
9124 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9125 SDValue Lane = Op.getOperand(1);
9126 if (!isa<ConstantSDNode>(Lane))
9127 return SDValue();
9128
9129 SDValue Vec = Op.getOperand(0);
9130 EVT VT = Vec.getValueType();
9131
9132 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9133 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9134
9135 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9136 SDLoc dl(Op);
9137 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9138 }
9139
9140 return Op;
9141}
9142
9144 const ARMSubtarget *ST) {
9145 SDLoc dl(Op);
9146 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9147 "Unexpected custom CONCAT_VECTORS lowering");
9149 "Unexpected custom CONCAT_VECTORS lowering");
9150 assert(ST->hasMVEIntegerOps() &&
9151 "CONCAT_VECTORS lowering only supported for MVE");
9152
9153 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9154 EVT Op1VT = V1.getValueType();
9155 EVT Op2VT = V2.getValueType();
9156 assert(Op1VT == Op2VT && "Operand types don't match!");
9157 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9158 "Unexpected i1 concat operations!");
9159 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9160
9161 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9162 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9163
9164 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9165 // promoted to v8i16, etc.
9166 MVT ElType =
9168 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9169
9170 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9171 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9172 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9173 // ConcatVT.
9174 SDValue ConVec =
9175 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9176 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9177 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9178 }
9179
9180 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9181 // to be the right size for the destination. For example, if Op1 is v4i1
9182 // then the promoted vector is v4i32. The result of concatenation gives a
9183 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9184 // needs truncating to i16 and inserting in the result.
9185 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9186 EVT NewVT = NewV.getValueType();
9187 EVT ConcatVT = ConVec.getValueType();
9188 unsigned ExtScale = 1;
9189 if (NewVT == MVT::v2f64) {
9190 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9191 ExtScale = 2;
9192 }
9193 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9194 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9195 DAG.getIntPtrConstant(i * ExtScale, dl));
9196 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9197 DAG.getConstant(j, dl, MVT::i32));
9198 }
9199 return ConVec;
9200 };
9201 unsigned j = 0;
9202 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9203 ConVec = ExtractInto(NewV1, ConVec, j);
9204 ConVec = ExtractInto(NewV2, ConVec, j);
9205
9206 // Now return the result of comparing the subvector with zero, which will
9207 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9208 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9209 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9210 };
9211
9212 // Concat each pair of subvectors and pack into the lower half of the array.
9213 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9214 while (ConcatOps.size() > 1) {
9215 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9216 SDValue V1 = ConcatOps[I];
9217 SDValue V2 = ConcatOps[I + 1];
9218 ConcatOps[I / 2] = ConcatPair(V1, V2);
9219 }
9220 ConcatOps.resize(ConcatOps.size() / 2);
9221 }
9222 return ConcatOps[0];
9223}
9224
9226 const ARMSubtarget *ST) {
9227 EVT VT = Op->getValueType(0);
9228 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9229 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9230
9231 // The only time a CONCAT_VECTORS operation can have legal types is when
9232 // two 64-bit vectors are concatenated to a 128-bit vector.
9233 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9234 "unexpected CONCAT_VECTORS");
9235 SDLoc dl(Op);
9236 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9237 SDValue Op0 = Op.getOperand(0);
9238 SDValue Op1 = Op.getOperand(1);
9239 if (!Op0.isUndef())
9240 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9241 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9242 DAG.getIntPtrConstant(0, dl));
9243 if (!Op1.isUndef())
9244 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9245 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9246 DAG.getIntPtrConstant(1, dl));
9247 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9248}
9249
9251 const ARMSubtarget *ST) {
9252 SDValue V1 = Op.getOperand(0);
9253 SDValue V2 = Op.getOperand(1);
9254 SDLoc dl(Op);
9255 EVT VT = Op.getValueType();
9256 EVT Op1VT = V1.getValueType();
9257 unsigned NumElts = VT.getVectorNumElements();
9258 unsigned Index = V2->getAsZExtVal();
9259
9260 assert(VT.getScalarSizeInBits() == 1 &&
9261 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9262 assert(ST->hasMVEIntegerOps() &&
9263 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9264
9265 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9266
9267 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9268 // promoted to v8i16, etc.
9269
9271
9272 if (NumElts == 2) {
9273 EVT SubVT = MVT::v4i32;
9274 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9275 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9276 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9277 DAG.getIntPtrConstant(i, dl));
9278 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9279 DAG.getConstant(j, dl, MVT::i32));
9280 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9281 DAG.getConstant(j + 1, dl, MVT::i32));
9282 }
9283 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9284 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9285 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9286 }
9287
9288 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9289 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9290 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9291 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9292 DAG.getIntPtrConstant(i, dl));
9293 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9294 DAG.getConstant(j, dl, MVT::i32));
9295 }
9296
9297 // Now return the result of comparing the subvector with zero,
9298 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9299 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9300 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9301}
9302
9303// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9305 const ARMSubtarget *ST) {
9306 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9307 EVT VT = N->getValueType(0);
9308 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9309 "Expected a vector i1 type!");
9310 SDValue Op = N->getOperand(0);
9311 EVT FromVT = Op.getValueType();
9312 SDLoc DL(N);
9313
9314 SDValue And =
9315 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9316 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9317 DAG.getCondCode(ISD::SETNE));
9318}
9319
9321 const ARMSubtarget *Subtarget) {
9322 if (!Subtarget->hasMVEIntegerOps())
9323 return SDValue();
9324
9325 EVT ToVT = N->getValueType(0);
9326 if (ToVT.getScalarType() == MVT::i1)
9327 return LowerTruncatei1(N, DAG, Subtarget);
9328
9329 // MVE does not have a single instruction to perform the truncation of a v4i32
9330 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9331 // Most of the instructions in MVE follow the 'Beats' system, where moving
9332 // values from different lanes is usually something that the instructions
9333 // avoid.
9334 //
9335 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9336 // which take a the top/bottom half of a larger lane and extend it (or do the
9337 // opposite, truncating into the top/bottom lane from a larger lane). Note
9338 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9339 // bottom 16bits from each vector lane. This works really well with T/B
9340 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9341 // to move order.
9342 //
9343 // But truncates and sext/zext are always going to be fairly common from llvm.
9344 // We have several options for how to deal with them:
9345 // - Wherever possible combine them into an instruction that makes them
9346 // "free". This includes loads/stores, which can perform the trunc as part
9347 // of the memory operation. Or certain shuffles that can be turned into
9348 // VMOVN/VMOVL.
9349 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9350 // trunc(mul(sext(a), sext(b))) may become
9351 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9352 // this case can use VMULL). This is performed in the
9353 // MVELaneInterleavingPass.
9354 // - Otherwise we have an option. By default we would expand the
9355 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9356 // registers. One for each vector lane in the vector. This can obviously be
9357 // very expensive.
9358 // - The other option is to use the fact that loads/store can extend/truncate
9359 // to turn a trunc into two truncating stack stores and a stack reload. This
9360 // becomes 3 back-to-back memory operations, but at least that is less than
9361 // all the insert/extracts.
9362 //
9363 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9364 // are either optimized where they can be, or eventually lowered into stack
9365 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9366 // two early, where other instructions would be better, and stops us from
9367 // having to reconstruct multiple buildvector shuffles into loads/stores.
9368 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9369 return SDValue();
9370 EVT FromVT = N->getOperand(0).getValueType();
9371 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9372 return SDValue();
9373
9374 SDValue Lo, Hi;
9375 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9376 SDLoc DL(N);
9377 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9378}
9379
9381 const ARMSubtarget *Subtarget) {
9382 if (!Subtarget->hasMVEIntegerOps())
9383 return SDValue();
9384
9385 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9386
9387 EVT ToVT = N->getValueType(0);
9388 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9389 return SDValue();
9390 SDValue Op = N->getOperand(0);
9391 EVT FromVT = Op.getValueType();
9392 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9393 return SDValue();
9394
9395 SDLoc DL(N);
9396 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9397 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9398 ExtVT = MVT::v8i16;
9399
9400 unsigned Opcode =
9402 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9403 SDValue Ext1 = Ext.getValue(1);
9404
9405 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9406 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9407 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9408 }
9409
9410 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9411}
9412
9413/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9414/// element has been zero/sign-extended, depending on the isSigned parameter,
9415/// from an integer type half its size.
9417 bool isSigned) {
9418 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9419 EVT VT = N->getValueType(0);
9420 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9421 SDNode *BVN = N->getOperand(0).getNode();
9422 if (BVN->getValueType(0) != MVT::v4i32 ||
9423 BVN->getOpcode() != ISD::BUILD_VECTOR)
9424 return false;
9425 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9426 unsigned HiElt = 1 - LoElt;
9427 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9428 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9429 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9430 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9431 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9432 return false;
9433 if (isSigned) {
9434 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9435 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9436 return true;
9437 } else {
9438 if (Hi0->isZero() && Hi1->isZero())
9439 return true;
9440 }
9441 return false;
9442 }
9443
9444 if (N->getOpcode() != ISD::BUILD_VECTOR)
9445 return false;
9446
9447 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9448 SDNode *Elt = N->getOperand(i).getNode();
9449 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9450 unsigned EltSize = VT.getScalarSizeInBits();
9451 unsigned HalfSize = EltSize / 2;
9452 if (isSigned) {
9453 if (!isIntN(HalfSize, C->getSExtValue()))
9454 return false;
9455 } else {
9456 if (!isUIntN(HalfSize, C->getZExtValue()))
9457 return false;
9458 }
9459 continue;
9460 }
9461 return false;
9462 }
9463
9464 return true;
9465}
9466
9467/// isSignExtended - Check if a node is a vector value that is sign-extended
9468/// or a constant BUILD_VECTOR with sign-extended elements.
9470 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9471 return true;
9472 if (isExtendedBUILD_VECTOR(N, DAG, true))
9473 return true;
9474 return false;
9475}
9476
9477/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9478/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9480 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9482 return true;
9483 if (isExtendedBUILD_VECTOR(N, DAG, false))
9484 return true;
9485 return false;
9486}
9487
9488static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9489 if (OrigVT.getSizeInBits() >= 64)
9490 return OrigVT;
9491
9492 assert(OrigVT.isSimple() && "Expecting a simple value type");
9493
9494 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9495 switch (OrigSimpleTy) {
9496 default: llvm_unreachable("Unexpected Vector Type");
9497 case MVT::v2i8:
9498 case MVT::v2i16:
9499 return MVT::v2i32;
9500 case MVT::v4i8:
9501 return MVT::v4i16;
9502 }
9503}
9504
9505/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9506/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9507/// We insert the required extension here to get the vector to fill a D register.
9509 const EVT &OrigTy,
9510 const EVT &ExtTy,
9511 unsigned ExtOpcode) {
9512 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9513 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9514 // 64-bits we need to insert a new extension so that it will be 64-bits.
9515 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9516 if (OrigTy.getSizeInBits() >= 64)
9517 return N;
9518
9519 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9520 EVT NewVT = getExtensionTo64Bits(OrigTy);
9521
9522 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9523}
9524
9525/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9526/// does not do any sign/zero extension. If the original vector is less
9527/// than 64 bits, an appropriate extension will be added after the load to
9528/// reach a total size of 64 bits. We have to add the extension separately
9529/// because ARM does not have a sign/zero extending load for vectors.
9531 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9532
9533 // The load already has the right type.
9534 if (ExtendedTy == LD->getMemoryVT())
9535 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9536 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9537 LD->getMemOperand()->getFlags());
9538
9539 // We need to create a zextload/sextload. We cannot just create a load
9540 // followed by a zext/zext node because LowerMUL is also run during normal
9541 // operation legalization where we can't create illegal types.
9542 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9543 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9544 LD->getMemoryVT(), LD->getAlign(),
9545 LD->getMemOperand()->getFlags());
9546}
9547
9548/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9549/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9550/// the unextended value. The unextended vector should be 64 bits so that it can
9551/// be used as an operand to a VMULL instruction. If the original vector size
9552/// before extension is less than 64 bits we add a an extension to resize
9553/// the vector to 64 bits.
9555 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9556 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9557 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9558 N->getOperand(0)->getValueType(0),
9559 N->getValueType(0),
9560 N->getOpcode());
9561
9562 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9563 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9564 "Expected extending load");
9565
9566 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9567 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9568 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9569 SDValue extLoad =
9570 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9571 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9572
9573 return newLoad;
9574 }
9575
9576 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9577 // have been legalized as a BITCAST from v4i32.
9578 if (N->getOpcode() == ISD::BITCAST) {
9579 SDNode *BVN = N->getOperand(0).getNode();
9581 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9582 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9583 return DAG.getBuildVector(
9584 MVT::v2i32, SDLoc(N),
9585 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9586 }
9587 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9588 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9589 EVT VT = N->getValueType(0);
9590 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9591 unsigned NumElts = VT.getVectorNumElements();
9592 MVT TruncVT = MVT::getIntegerVT(EltSize);
9594 SDLoc dl(N);
9595 for (unsigned i = 0; i != NumElts; ++i) {
9596 const APInt &CInt = N->getConstantOperandAPInt(i);
9597 // Element types smaller than 32 bits are not legal, so use i32 elements.
9598 // The values are implicitly truncated so sext vs. zext doesn't matter.
9599 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9600 }
9601 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9602}
9603
9604static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9605 unsigned Opcode = N->getOpcode();
9606 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9607 SDNode *N0 = N->getOperand(0).getNode();
9608 SDNode *N1 = N->getOperand(1).getNode();
9609 return N0->hasOneUse() && N1->hasOneUse() &&
9610 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9611 }
9612 return false;
9613}
9614
9615static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9616 unsigned Opcode = N->getOpcode();
9617 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9618 SDNode *N0 = N->getOperand(0).getNode();
9619 SDNode *N1 = N->getOperand(1).getNode();
9620 return N0->hasOneUse() && N1->hasOneUse() &&
9621 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9622 }
9623 return false;
9624}
9625
9627 // Multiplications are only custom-lowered for 128-bit vectors so that
9628 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9629 EVT VT = Op.getValueType();
9630 assert(VT.is128BitVector() && VT.isInteger() &&
9631 "unexpected type for custom-lowering ISD::MUL");
9632 SDNode *N0 = Op.getOperand(0).getNode();
9633 SDNode *N1 = Op.getOperand(1).getNode();
9634 unsigned NewOpc = 0;
9635 bool isMLA = false;
9636 bool isN0SExt = isSignExtended(N0, DAG);
9637 bool isN1SExt = isSignExtended(N1, DAG);
9638 if (isN0SExt && isN1SExt)
9639 NewOpc = ARMISD::VMULLs;
9640 else {
9641 bool isN0ZExt = isZeroExtended(N0, DAG);
9642 bool isN1ZExt = isZeroExtended(N1, DAG);
9643 if (isN0ZExt && isN1ZExt)
9644 NewOpc = ARMISD::VMULLu;
9645 else if (isN1SExt || isN1ZExt) {
9646 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9647 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9648 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9649 NewOpc = ARMISD::VMULLs;
9650 isMLA = true;
9651 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9652 NewOpc = ARMISD::VMULLu;
9653 isMLA = true;
9654 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9655 std::swap(N0, N1);
9656 NewOpc = ARMISD::VMULLu;
9657 isMLA = true;
9658 }
9659 }
9660
9661 if (!NewOpc) {
9662 if (VT == MVT::v2i64)
9663 // Fall through to expand this. It is not legal.
9664 return SDValue();
9665 else
9666 // Other vector multiplications are legal.
9667 return Op;
9668 }
9669 }
9670
9671 // Legalize to a VMULL instruction.
9672 SDLoc DL(Op);
9673 SDValue Op0;
9674 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9675 if (!isMLA) {
9676 Op0 = SkipExtensionForVMULL(N0, DAG);
9678 Op1.getValueType().is64BitVector() &&
9679 "unexpected types for extended operands to VMULL");
9680 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9681 }
9682
9683 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9684 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9685 // vmull q0, d4, d6
9686 // vmlal q0, d5, d6
9687 // is faster than
9688 // vaddl q0, d4, d5
9689 // vmovl q1, d6
9690 // vmul q0, q0, q1
9691 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9692 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9693 EVT Op1VT = Op1.getValueType();
9694 return DAG.getNode(N0->getOpcode(), DL, VT,
9695 DAG.getNode(NewOpc, DL, VT,
9696 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9697 DAG.getNode(NewOpc, DL, VT,
9698 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9699}
9700
9702 SelectionDAG &DAG) {
9703 // TODO: Should this propagate fast-math-flags?
9704
9705 // Convert to float
9706 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9707 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9708 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9709 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9710 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9711 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9712 // Get reciprocal estimate.
9713 // float4 recip = vrecpeq_f32(yf);
9714 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9715 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9716 Y);
9717 // Because char has a smaller range than uchar, we can actually get away
9718 // without any newton steps. This requires that we use a weird bias
9719 // of 0xb000, however (again, this has been exhaustively tested).
9720 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9721 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9722 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9723 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9724 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9725 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9726 // Convert back to short.
9727 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9728 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9729 return X;
9730}
9731
9733 SelectionDAG &DAG) {
9734 // TODO: Should this propagate fast-math-flags?
9735
9736 SDValue N2;
9737 // Convert to float.
9738 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9739 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9740 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9741 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9742 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9743 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9744
9745 // Use reciprocal estimate and one refinement step.
9746 // float4 recip = vrecpeq_f32(yf);
9747 // recip *= vrecpsq_f32(yf, recip);
9748 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9749 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9750 N1);
9751 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9752 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9753 N1, N2);
9754 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9755 // Because short has a smaller range than ushort, we can actually get away
9756 // with only a single newton step. This requires that we use a weird bias
9757 // of 89, however (again, this has been exhaustively tested).
9758 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9759 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9760 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9761 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9762 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9763 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9764 // Convert back to integer and return.
9765 // return vmovn_s32(vcvt_s32_f32(result));
9766 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9767 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9768 return N0;
9769}
9770
9772 const ARMSubtarget *ST) {
9773 EVT VT = Op.getValueType();
9774 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9775 "unexpected type for custom-lowering ISD::SDIV");
9776
9777 SDLoc dl(Op);
9778 SDValue N0 = Op.getOperand(0);
9779 SDValue N1 = Op.getOperand(1);
9780 SDValue N2, N3;
9781
9782 if (VT == MVT::v8i8) {
9783 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9784 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9785
9786 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9787 DAG.getIntPtrConstant(4, dl));
9788 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9789 DAG.getIntPtrConstant(4, dl));
9790 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9791 DAG.getIntPtrConstant(0, dl));
9792 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9793 DAG.getIntPtrConstant(0, dl));
9794
9795 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9796 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9797
9798 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9799 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9800
9801 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9802 return N0;
9803 }
9804 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9805}
9806
9808 const ARMSubtarget *ST) {
9809 // TODO: Should this propagate fast-math-flags?
9810 EVT VT = Op.getValueType();
9811 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9812 "unexpected type for custom-lowering ISD::UDIV");
9813
9814 SDLoc dl(Op);
9815 SDValue N0 = Op.getOperand(0);
9816 SDValue N1 = Op.getOperand(1);
9817 SDValue N2, N3;
9818
9819 if (VT == MVT::v8i8) {
9820 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9821 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9822
9823 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9824 DAG.getIntPtrConstant(4, dl));
9825 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9826 DAG.getIntPtrConstant(4, dl));
9827 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9828 DAG.getIntPtrConstant(0, dl));
9829 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9830 DAG.getIntPtrConstant(0, dl));
9831
9832 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9833 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9834
9835 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9836 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9837
9838 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9839 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9840 MVT::i32),
9841 N0);
9842 return N0;
9843 }
9844
9845 // v4i16 sdiv ... Convert to float.
9846 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9847 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9848 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9849 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9850 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9851 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9852
9853 // Use reciprocal estimate and two refinement steps.
9854 // float4 recip = vrecpeq_f32(yf);
9855 // recip *= vrecpsq_f32(yf, recip);
9856 // recip *= vrecpsq_f32(yf, recip);
9857 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9858 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9859 BN1);
9860 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9861 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9862 BN1, N2);
9863 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9864 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9865 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9866 BN1, N2);
9867 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9868 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9869 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9870 // and that it will never cause us to return an answer too large).
9871 // float4 result = as_float4(as_int4(xf*recip) + 2);
9872 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9873 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9874 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9875 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9876 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9877 // Convert back to integer and return.
9878 // return vmovn_u32(vcvt_s32_f32(result));
9879 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9880 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9881 return N0;
9882}
9883
9885 SDNode *N = Op.getNode();
9886 EVT VT = N->getValueType(0);
9887 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9888
9889 SDValue Carry = Op.getOperand(2);
9890
9891 SDLoc DL(Op);
9892
9893 SDValue Result;
9894 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9895 // This converts the boolean value carry into the carry flag.
9896 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9897
9898 // Do the addition proper using the carry flag we wanted.
9899 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9900 Op.getOperand(1), Carry);
9901
9902 // Now convert the carry flag into a boolean value.
9903 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9904 } else {
9905 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9906 // have to invert the carry first.
9907 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9908 DAG.getConstant(1, DL, MVT::i32), Carry);
9909 // This converts the boolean value carry into the carry flag.
9910 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9911
9912 // Do the subtraction proper using the carry flag we wanted.
9913 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9914 Op.getOperand(1), Carry);
9915
9916 // Now convert the carry flag into a boolean value.
9917 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9918 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9919 // by ISD::USUBO_CARRY, so compute 1 - C.
9920 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9921 DAG.getConstant(1, DL, MVT::i32), Carry);
9922 }
9923
9924 // Return both values.
9925 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9926}
9927
9928SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9929 assert(Subtarget->isTargetDarwin());
9930
9931 // For iOS, we want to call an alternative entry point: __sincos_stret,
9932 // return values are passed via sret.
9933 SDLoc dl(Op);
9934 SDValue Arg = Op.getOperand(0);
9935 EVT ArgVT = Arg.getValueType();
9936 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9937 auto PtrVT = getPointerTy(DAG.getDataLayout());
9938
9940 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9941
9942 // Pair of floats / doubles used to pass the result.
9943 Type *RetTy = StructType::get(ArgTy, ArgTy);
9944 auto &DL = DAG.getDataLayout();
9945
9947 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9948 SDValue SRet;
9949 if (ShouldUseSRet) {
9950 // Create stack object for sret.
9951 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9952 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9953 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9954 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9955
9956 ArgListEntry Entry;
9957 Entry.Node = SRet;
9958 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9959 Entry.IsSExt = false;
9960 Entry.IsZExt = false;
9961 Entry.IsSRet = true;
9962 Args.push_back(Entry);
9964 }
9965
9966 ArgListEntry Entry;
9967 Entry.Node = Arg;
9968 Entry.Ty = ArgTy;
9969 Entry.IsSExt = false;
9970 Entry.IsZExt = false;
9971 Args.push_back(Entry);
9972
9973 RTLIB::Libcall LC =
9974 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9975 const char *LibcallName = getLibcallName(LC);
9977 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9978
9980 CLI.setDebugLoc(dl)
9981 .setChain(DAG.getEntryNode())
9982 .setCallee(CC, RetTy, Callee, std::move(Args))
9983 .setDiscardResult(ShouldUseSRet);
9984 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9985
9986 if (!ShouldUseSRet)
9987 return CallResult.first;
9988
9989 SDValue LoadSin =
9990 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9991
9992 // Address of cos field.
9993 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9994 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9995 SDValue LoadCos =
9996 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9997
9998 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9999 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
10000 LoadSin.getValue(0), LoadCos.getValue(0));
10001}
10002
10003SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
10004 bool Signed,
10005 SDValue &Chain) const {
10006 EVT VT = Op.getValueType();
10007 assert((VT == MVT::i32 || VT == MVT::i64) &&
10008 "unexpected type for custom lowering DIV");
10009 SDLoc dl(Op);
10010
10011 const auto &DL = DAG.getDataLayout();
10012 const auto &TLI = DAG.getTargetLoweringInfo();
10013
10014 const char *Name = nullptr;
10015 if (Signed)
10016 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
10017 else
10018 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
10019
10021
10023
10024 for (auto AI : {1, 0}) {
10025 ArgListEntry Arg;
10026 Arg.Node = Op.getOperand(AI);
10027 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10028 Args.push_back(Arg);
10029 }
10030
10031 CallLoweringInfo CLI(DAG);
10032 CLI.setDebugLoc(dl)
10033 .setChain(Chain)
10035 ES, std::move(Args));
10036
10037 return LowerCallTo(CLI).first;
10038}
10039
10040// This is a code size optimisation: return the original SDIV node to
10041// DAGCombiner when we don't want to expand SDIV into a sequence of
10042// instructions, and an empty node otherwise which will cause the
10043// SDIV to be expanded in DAGCombine.
10044SDValue
10045ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10046 SelectionDAG &DAG,
10047 SmallVectorImpl<SDNode *> &Created) const {
10048 // TODO: Support SREM
10049 if (N->getOpcode() != ISD::SDIV)
10050 return SDValue();
10051
10052 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10053 const bool MinSize = ST.hasMinSize();
10054 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10055 : ST.hasDivideInARMMode();
10056
10057 // Don't touch vector types; rewriting this may lead to scalarizing
10058 // the int divs.
10059 if (N->getOperand(0).getValueType().isVector())
10060 return SDValue();
10061
10062 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10063 // hwdiv support for this to be really profitable.
10064 if (!(MinSize && HasDivide))
10065 return SDValue();
10066
10067 // ARM mode is a bit simpler than Thumb: we can handle large power
10068 // of 2 immediates with 1 mov instruction; no further checks required,
10069 // just return the sdiv node.
10070 if (!ST.isThumb())
10071 return SDValue(N, 0);
10072
10073 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10074 // and thus lose the code size benefits of a MOVS that requires only 2.
10075 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10076 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10077 if (Divisor.sgt(128))
10078 return SDValue();
10079
10080 return SDValue(N, 0);
10081}
10082
10083SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10084 bool Signed) const {
10085 assert(Op.getValueType() == MVT::i32 &&
10086 "unexpected type for custom lowering DIV");
10087 SDLoc dl(Op);
10088
10089 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10090 DAG.getEntryNode(), Op.getOperand(1));
10091
10092 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10093}
10094
10096 SDLoc DL(N);
10097 SDValue Op = N->getOperand(1);
10098 if (N->getValueType(0) == MVT::i32)
10099 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10100 SDValue Lo, Hi;
10101 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10102 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10103 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10104}
10105
10106void ARMTargetLowering::ExpandDIV_Windows(
10107 SDValue Op, SelectionDAG &DAG, bool Signed,
10109 const auto &DL = DAG.getDataLayout();
10110 const auto &TLI = DAG.getTargetLoweringInfo();
10111
10112 assert(Op.getValueType() == MVT::i64 &&
10113 "unexpected type for custom lowering DIV");
10114 SDLoc dl(Op);
10115
10116 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10117
10118 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10119
10120 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10121 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10122 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10123 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10124
10125 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10126}
10127
10129 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10130 EVT MemVT = LD->getMemoryVT();
10131 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10132 MemVT == MVT::v16i1) &&
10133 "Expected a predicate type!");
10134 assert(MemVT == Op.getValueType());
10135 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10136 "Expected a non-extending load");
10137 assert(LD->isUnindexed() && "Expected a unindexed load");
10138
10139 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10140 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10141 // need to make sure that 8/4/2 bits are actually loaded into the correct
10142 // place, which means loading the value and then shuffling the values into
10143 // the bottom bits of the predicate.
10144 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10145 // for BE).
10146 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10147 // a natural VMSR(load), so needs to be reversed.
10148
10149 SDLoc dl(Op);
10150 SDValue Load = DAG.getExtLoad(
10151 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10153 LD->getMemOperand());
10154 SDValue Val = Load;
10155 if (DAG.getDataLayout().isBigEndian())
10156 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10157 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10158 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10159 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10160 if (MemVT != MVT::v16i1)
10161 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10162 DAG.getConstant(0, dl, MVT::i32));
10163 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10164}
10165
10166void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10167 SelectionDAG &DAG) const {
10168 LoadSDNode *LD = cast<LoadSDNode>(N);
10169 EVT MemVT = LD->getMemoryVT();
10170 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10171
10172 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10173 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10174 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10175 SDLoc dl(N);
10177 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10178 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10179 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10180 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10181 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10182 Results.append({Pair, Result.getValue(2)});
10183 }
10184}
10185
10187 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10188 EVT MemVT = ST->getMemoryVT();
10189 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10190 MemVT == MVT::v16i1) &&
10191 "Expected a predicate type!");
10192 assert(MemVT == ST->getValue().getValueType());
10193 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10194 assert(ST->isUnindexed() && "Expected a unindexed store");
10195
10196 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10197 // top bits unset and a scalar store.
10198 SDLoc dl(Op);
10199 SDValue Build = ST->getValue();
10200 if (MemVT != MVT::v16i1) {
10202 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10203 unsigned Elt = DAG.getDataLayout().isBigEndian()
10204 ? MemVT.getVectorNumElements() - I - 1
10205 : I;
10206 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10207 DAG.getConstant(Elt, dl, MVT::i32)));
10208 }
10209 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10210 Ops.push_back(DAG.getUNDEF(MVT::i32));
10211 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10212 }
10213 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10214 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10215 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10216 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10217 DAG.getConstant(16, dl, MVT::i32));
10218 return DAG.getTruncStore(
10219 ST->getChain(), dl, GRP, ST->getBasePtr(),
10221 ST->getMemOperand());
10222}
10223
10225 const ARMSubtarget *Subtarget) {
10226 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10227 EVT MemVT = ST->getMemoryVT();
10228 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10229
10230 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10231 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10232 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10233 SDNode *N = Op.getNode();
10234 SDLoc dl(N);
10235
10236 SDValue Lo = DAG.getNode(
10237 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10238 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10239 MVT::i32));
10240 SDValue Hi = DAG.getNode(
10241 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10242 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10243 MVT::i32));
10244
10245 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10246 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10247 MemVT, ST->getMemOperand());
10248 } else if (Subtarget->hasMVEIntegerOps() &&
10249 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10250 MemVT == MVT::v16i1))) {
10251 return LowerPredicateStore(Op, DAG);
10252 }
10253
10254 return SDValue();
10255}
10256
10257static bool isZeroVector(SDValue N) {
10258 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10259 (N->getOpcode() == ARMISD::VMOVIMM &&
10260 isNullConstant(N->getOperand(0))));
10261}
10262
10264 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10265 MVT VT = Op.getSimpleValueType();
10266 SDValue Mask = N->getMask();
10267 SDValue PassThru = N->getPassThru();
10268 SDLoc dl(Op);
10269
10270 if (isZeroVector(PassThru))
10271 return Op;
10272
10273 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10274 // zero too, and other values are lowered to a select.
10275 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10276 DAG.getTargetConstant(0, dl, MVT::i32));
10277 SDValue NewLoad = DAG.getMaskedLoad(
10278 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10279 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10280 N->getExtensionType(), N->isExpandingLoad());
10281 SDValue Combo = NewLoad;
10282 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10283 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10284 isZeroVector(PassThru->getOperand(0));
10285 if (!PassThru.isUndef() && !PassThruIsCastZero)
10286 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10287 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10288}
10289
10291 const ARMSubtarget *ST) {
10292 if (!ST->hasMVEIntegerOps())
10293 return SDValue();
10294
10295 SDLoc dl(Op);
10296 unsigned BaseOpcode = 0;
10297 switch (Op->getOpcode()) {
10298 default: llvm_unreachable("Expected VECREDUCE opcode");
10299 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10300 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10301 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10302 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10303 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10304 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10305 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10306 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10307 }
10308
10309 SDValue Op0 = Op->getOperand(0);
10310 EVT VT = Op0.getValueType();
10311 EVT EltVT = VT.getVectorElementType();
10312 unsigned NumElts = VT.getVectorNumElements();
10313 unsigned NumActiveLanes = NumElts;
10314
10315 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10316 NumActiveLanes == 2) &&
10317 "Only expected a power 2 vector size");
10318
10319 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10320 // allows us to easily extract vector elements from the lanes.
10321 while (NumActiveLanes > 4) {
10322 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10323 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10324 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10325 NumActiveLanes /= 2;
10326 }
10327
10328 SDValue Res;
10329 if (NumActiveLanes == 4) {
10330 // The remaining 4 elements are summed sequentially
10331 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10332 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10333 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10334 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10335 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10336 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10337 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10338 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10339 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10340 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10341 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10342 } else {
10343 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10344 DAG.getConstant(0, dl, MVT::i32));
10345 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10346 DAG.getConstant(1, dl, MVT::i32));
10347 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10348 }
10349
10350 // Result type may be wider than element type.
10351 if (EltVT != Op->getValueType(0))
10352 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10353 return Res;
10354}
10355
10357 const ARMSubtarget *ST) {
10358 if (!ST->hasMVEFloatOps())
10359 return SDValue();
10360 return LowerVecReduce(Op, DAG, ST);
10361}
10362
10364 const ARMSubtarget *ST) {
10365 if (!ST->hasNEON())
10366 return SDValue();
10367
10368 SDLoc dl(Op);
10369 SDValue Op0 = Op->getOperand(0);
10370 EVT VT = Op0.getValueType();
10371 EVT EltVT = VT.getVectorElementType();
10372
10373 unsigned PairwiseIntrinsic = 0;
10374 switch (Op->getOpcode()) {
10375 default:
10376 llvm_unreachable("Expected VECREDUCE opcode");
10378 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10379 break;
10381 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10382 break;
10384 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10385 break;
10387 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10388 break;
10389 }
10390 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10391
10392 unsigned NumElts = VT.getVectorNumElements();
10393 unsigned NumActiveLanes = NumElts;
10394
10395 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10396 NumActiveLanes == 2) &&
10397 "Only expected a power 2 vector size");
10398
10399 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10400 if (VT.is128BitVector()) {
10401 SDValue Lo, Hi;
10402 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10403 VT = Lo.getValueType();
10404 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10405 NumActiveLanes /= 2;
10406 }
10407
10408 // Use pairwise reductions until one lane remains
10409 while (NumActiveLanes > 1) {
10410 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10411 NumActiveLanes /= 2;
10412 }
10413
10414 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10415 DAG.getConstant(0, dl, MVT::i32));
10416
10417 // Result type may be wider than element type.
10418 if (EltVT != Op.getValueType()) {
10419 unsigned Extend = 0;
10420 switch (Op->getOpcode()) {
10421 default:
10422 llvm_unreachable("Expected VECREDUCE opcode");
10425 Extend = ISD::ZERO_EXTEND;
10426 break;
10429 Extend = ISD::SIGN_EXTEND;
10430 break;
10431 }
10432 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10433 }
10434 return Res;
10435}
10436
10438 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10439 // Acquire/Release load/store is not legal for targets without a dmb or
10440 // equivalent available.
10441 return SDValue();
10442
10443 // Monotonic load/store is legal for all targets.
10444 return Op;
10445}
10446
10449 SelectionDAG &DAG,
10450 const ARMSubtarget *Subtarget) {
10451 SDLoc DL(N);
10452 // Under Power Management extensions, the cycle-count is:
10453 // mrc p15, #0, <Rt>, c9, c13, #0
10454 SDValue Ops[] = { N->getOperand(0), // Chain
10455 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10456 DAG.getTargetConstant(15, DL, MVT::i32),
10457 DAG.getTargetConstant(0, DL, MVT::i32),
10458 DAG.getTargetConstant(9, DL, MVT::i32),
10459 DAG.getTargetConstant(13, DL, MVT::i32),
10460 DAG.getTargetConstant(0, DL, MVT::i32)
10461 };
10462
10463 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10464 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10465 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10466 DAG.getConstant(0, DL, MVT::i32)));
10467 Results.push_back(Cycles32.getValue(1));
10468}
10469
10471 SDLoc dl(V.getNode());
10472 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10473 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10474 if (isBigEndian)
10475 std::swap (VLo, VHi);
10476 SDValue RegClass =
10477 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10478 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10479 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10480 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10481 return SDValue(
10482 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10483}
10484
10487 SelectionDAG &DAG) {
10488 assert(N->getValueType(0) == MVT::i64 &&
10489 "AtomicCmpSwap on types less than 64 should be legal");
10490 SDValue Ops[] = {N->getOperand(1),
10491 createGPRPairNode(DAG, N->getOperand(2)),
10492 createGPRPairNode(DAG, N->getOperand(3)),
10493 N->getOperand(0)};
10494 SDNode *CmpSwap = DAG.getMachineNode(
10495 ARM::CMP_SWAP_64, SDLoc(N),
10496 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10497
10498 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10499 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10500
10501 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10502
10503 SDValue Lo =
10504 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10505 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10506 SDValue Hi =
10507 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10508 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10509 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10510 Results.push_back(SDValue(CmpSwap, 2));
10511}
10512
10513SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10514 SDLoc dl(Op);
10515 EVT VT = Op.getValueType();
10516 SDValue Chain = Op.getOperand(0);
10517 SDValue LHS = Op.getOperand(1);
10518 SDValue RHS = Op.getOperand(2);
10519 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10520 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10521
10522 // If we don't have instructions of this float type then soften to a libcall
10523 // and use SETCC instead.
10524 if (isUnsupportedFloatingType(LHS.getValueType())) {
10526 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10527 if (!RHS.getNode()) {
10528 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10529 CC = ISD::SETNE;
10530 }
10531 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10532 DAG.getCondCode(CC));
10533 return DAG.getMergeValues({Result, Chain}, dl);
10534 }
10535
10536 ARMCC::CondCodes CondCode, CondCode2;
10537 FPCCToARMCC(CC, CondCode, CondCode2);
10538
10539 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10540 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10541 // instructions using a chain instead of glue. This would also fix the problem
10542 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10543 // CondCode2 != AL.
10544 SDValue True = DAG.getConstant(1, dl, VT);
10545 SDValue False = DAG.getConstant(0, dl, VT);
10546 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10547 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10548 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10549 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10550 if (CondCode2 != ARMCC::AL) {
10551 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10552 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10553 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10554 }
10555 return DAG.getMergeValues({Result, Chain}, dl);
10556}
10557
10558SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10560
10561 EVT VT = getPointerTy(DAG.getDataLayout());
10562 SDLoc DL(Op);
10563 int FI = MFI.CreateFixedObject(4, 0, false);
10564 return DAG.getFrameIndex(FI, VT);
10565}
10566
10568 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10569 switch (Op.getOpcode()) {
10570 default: llvm_unreachable("Don't know how to custom lower this!");
10571 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10572 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10573 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10574 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10575 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10576 case ISD::SELECT: return LowerSELECT(Op, DAG);
10577 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10578 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10579 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10580 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10581 case ISD::VASTART: return LowerVASTART(Op, DAG);
10582 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10583 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10584 case ISD::SINT_TO_FP:
10585 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10588 case ISD::FP_TO_SINT:
10589 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10591 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10592 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10593 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10594 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10595 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10596 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10597 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10598 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10599 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10600 Subtarget);
10601 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10602 case ISD::SHL:
10603 case ISD::SRL:
10604 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10605 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10606 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10607 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10608 case ISD::SRL_PARTS:
10609 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10610 case ISD::CTTZ:
10611 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10612 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10613 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10614 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10615 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10616 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10617 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10618 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10619 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10620 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10621 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10622 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10623 case ISD::SIGN_EXTEND:
10624 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10625 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10626 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10627 case ISD::SET_FPMODE:
10628 return LowerSET_FPMODE(Op, DAG);
10629 case ISD::RESET_FPMODE:
10630 return LowerRESET_FPMODE(Op, DAG);
10631 case ISD::MUL: return LowerMUL(Op, DAG);
10632 case ISD::SDIV:
10633 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10634 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10635 return LowerSDIV(Op, DAG, Subtarget);
10636 case ISD::UDIV:
10637 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10638 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10639 return LowerUDIV(Op, DAG, Subtarget);
10640 case ISD::UADDO_CARRY:
10641 case ISD::USUBO_CARRY:
10642 return LowerUADDSUBO_CARRY(Op, DAG);
10643 case ISD::SADDO:
10644 case ISD::SSUBO:
10645 return LowerSignedALUO(Op, DAG);
10646 case ISD::UADDO:
10647 case ISD::USUBO:
10648 return LowerUnsignedALUO(Op, DAG);
10649 case ISD::SADDSAT:
10650 case ISD::SSUBSAT:
10651 case ISD::UADDSAT:
10652 case ISD::USUBSAT:
10653 return LowerADDSUBSAT(Op, DAG, Subtarget);
10654 case ISD::LOAD:
10655 return LowerPredicateLoad(Op, DAG);
10656 case ISD::STORE:
10657 return LowerSTORE(Op, DAG, Subtarget);
10658 case ISD::MLOAD:
10659 return LowerMLOAD(Op, DAG);
10660 case ISD::VECREDUCE_MUL:
10661 case ISD::VECREDUCE_AND:
10662 case ISD::VECREDUCE_OR:
10663 case ISD::VECREDUCE_XOR:
10664 return LowerVecReduce(Op, DAG, Subtarget);
10669 return LowerVecReduceF(Op, DAG, Subtarget);
10674 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10675 case ISD::ATOMIC_LOAD:
10676 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10677 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10678 case ISD::SDIVREM:
10679 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10681 if (Subtarget->isTargetWindows())
10682 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10683 llvm_unreachable("Don't know how to custom lower this!");
10685 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10687 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10688 case ISD::STRICT_FSETCC:
10689 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10690 case ISD::SPONENTRY:
10691 return LowerSPONENTRY(Op, DAG);
10692 case ARMISD::WIN__DBZCHK: return SDValue();
10693 }
10694}
10695
10697 SelectionDAG &DAG) {
10698 unsigned IntNo = N->getConstantOperandVal(0);
10699 unsigned Opc = 0;
10700 if (IntNo == Intrinsic::arm_smlald)
10701 Opc = ARMISD::SMLALD;
10702 else if (IntNo == Intrinsic::arm_smlaldx)
10703 Opc = ARMISD::SMLALDX;
10704 else if (IntNo == Intrinsic::arm_smlsld)
10705 Opc = ARMISD::SMLSLD;
10706 else if (IntNo == Intrinsic::arm_smlsldx)
10707 Opc = ARMISD::SMLSLDX;
10708 else
10709 return;
10710
10711 SDLoc dl(N);
10712 SDValue Lo, Hi;
10713 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10714
10715 SDValue LongMul = DAG.getNode(Opc, dl,
10716 DAG.getVTList(MVT::i32, MVT::i32),
10717 N->getOperand(1), N->getOperand(2),
10718 Lo, Hi);
10719 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10720 LongMul.getValue(0), LongMul.getValue(1)));
10721}
10722
10723/// ReplaceNodeResults - Replace the results of node with an illegal result
10724/// type with new values built out of custom code.
10727 SelectionDAG &DAG) const {
10728 SDValue Res;
10729 switch (N->getOpcode()) {
10730 default:
10731 llvm_unreachable("Don't know how to custom expand this!");
10732 case ISD::READ_REGISTER:
10734 break;
10735 case ISD::BITCAST:
10736 Res = ExpandBITCAST(N, DAG, Subtarget);
10737 break;
10738 case ISD::SRL:
10739 case ISD::SRA:
10740 case ISD::SHL:
10741 Res = Expand64BitShift(N, DAG, Subtarget);
10742 break;
10743 case ISD::SREM:
10744 case ISD::UREM:
10745 Res = LowerREM(N, DAG);
10746 break;
10747 case ISD::SDIVREM:
10748 case ISD::UDIVREM:
10749 Res = LowerDivRem(SDValue(N, 0), DAG);
10750 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10751 Results.push_back(Res.getValue(0));
10752 Results.push_back(Res.getValue(1));
10753 return;
10754 case ISD::SADDSAT:
10755 case ISD::SSUBSAT:
10756 case ISD::UADDSAT:
10757 case ISD::USUBSAT:
10758 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10759 break;
10761 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10762 return;
10763 case ISD::UDIV:
10764 case ISD::SDIV:
10765 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10766 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10767 Results);
10770 return;
10772 return ReplaceLongIntrinsic(N, Results, DAG);
10773 case ISD::LOAD:
10774 LowerLOAD(N, Results, DAG);
10775 break;
10776 case ISD::TRUNCATE:
10777 Res = LowerTruncate(N, DAG, Subtarget);
10778 break;
10779 case ISD::SIGN_EXTEND:
10780 case ISD::ZERO_EXTEND:
10781 Res = LowerVectorExtend(N, DAG, Subtarget);
10782 break;
10785 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10786 break;
10787 }
10788 if (Res.getNode())
10789 Results.push_back(Res);
10790}
10791
10792//===----------------------------------------------------------------------===//
10793// ARM Scheduler Hooks
10794//===----------------------------------------------------------------------===//
10795
10796/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10797/// registers the function context.
10798void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10800 MachineBasicBlock *DispatchBB,
10801 int FI) const {
10802 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10803 "ROPI/RWPI not currently supported with SjLj");
10804 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10805 DebugLoc dl = MI.getDebugLoc();
10806 MachineFunction *MF = MBB->getParent();
10810 const Function &F = MF->getFunction();
10811
10812 bool isThumb = Subtarget->isThumb();
10813 bool isThumb2 = Subtarget->isThumb2();
10814
10815 unsigned PCLabelId = AFI->createPICLabelUId();
10816 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10818 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10819 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10820
10821 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10822 : &ARM::GPRRegClass;
10823
10824 // Grab constant pool and fixed stack memory operands.
10825 MachineMemOperand *CPMMO =
10828
10829 MachineMemOperand *FIMMOSt =
10832
10833 // Load the address of the dispatch MBB into the jump buffer.
10834 if (isThumb2) {
10835 // Incoming value: jbuf
10836 // ldr.n r5, LCPI1_1
10837 // orr r5, r5, #1
10838 // add r5, pc
10839 // str r5, [$jbuf, #+4] ; &jbuf[1]
10840 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10841 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10843 .addMemOperand(CPMMO)
10845 // Set the low bit because of thumb mode.
10846 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10847 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10848 .addReg(NewVReg1, RegState::Kill)
10849 .addImm(0x01)
10851 .add(condCodeOp());
10852 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10853 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10854 .addReg(NewVReg2, RegState::Kill)
10855 .addImm(PCLabelId);
10856 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10857 .addReg(NewVReg3, RegState::Kill)
10858 .addFrameIndex(FI)
10859 .addImm(36) // &jbuf[1] :: pc
10860 .addMemOperand(FIMMOSt)
10862 } else if (isThumb) {
10863 // Incoming value: jbuf
10864 // ldr.n r1, LCPI1_4
10865 // add r1, pc
10866 // mov r2, #1
10867 // orrs r1, r2
10868 // add r2, $jbuf, #+4 ; &jbuf[1]
10869 // str r1, [r2]
10870 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10871 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10873 .addMemOperand(CPMMO)
10875 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10876 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10877 .addReg(NewVReg1, RegState::Kill)
10878 .addImm(PCLabelId);
10879 // Set the low bit because of thumb mode.
10880 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10881 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10882 .addReg(ARM::CPSR, RegState::Define)
10883 .addImm(1)
10885 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10886 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10887 .addReg(ARM::CPSR, RegState::Define)
10888 .addReg(NewVReg2, RegState::Kill)
10889 .addReg(NewVReg3, RegState::Kill)
10891 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10892 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10893 .addFrameIndex(FI)
10894 .addImm(36); // &jbuf[1] :: pc
10895 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10896 .addReg(NewVReg4, RegState::Kill)
10897 .addReg(NewVReg5, RegState::Kill)
10898 .addImm(0)
10899 .addMemOperand(FIMMOSt)
10901 } else {
10902 // Incoming value: jbuf
10903 // ldr r1, LCPI1_1
10904 // add r1, pc, r1
10905 // str r1, [$jbuf, #+4] ; &jbuf[1]
10906 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10907 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10909 .addImm(0)
10910 .addMemOperand(CPMMO)
10912 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10913 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10914 .addReg(NewVReg1, RegState::Kill)
10915 .addImm(PCLabelId)
10917 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10918 .addReg(NewVReg2, RegState::Kill)
10919 .addFrameIndex(FI)
10920 .addImm(36) // &jbuf[1] :: pc
10921 .addMemOperand(FIMMOSt)
10923 }
10924}
10925
10926void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10927 MachineBasicBlock *MBB) const {
10928 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10929 DebugLoc dl = MI.getDebugLoc();
10930 MachineFunction *MF = MBB->getParent();
10932 MachineFrameInfo &MFI = MF->getFrameInfo();
10933 int FI = MFI.getFunctionContextIndex();
10934
10935 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10936 : &ARM::GPRnopcRegClass;
10937
10938 // Get a mapping of the call site numbers to all of the landing pads they're
10939 // associated with.
10941 unsigned MaxCSNum = 0;
10942 for (MachineBasicBlock &BB : *MF) {
10943 if (!BB.isEHPad())
10944 continue;
10945
10946 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10947 // pad.
10948 for (MachineInstr &II : BB) {
10949 if (!II.isEHLabel())
10950 continue;
10951
10952 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10953 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10954
10955 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10956 for (unsigned Idx : CallSiteIdxs) {
10957 CallSiteNumToLPad[Idx].push_back(&BB);
10958 MaxCSNum = std::max(MaxCSNum, Idx);
10959 }
10960 break;
10961 }
10962 }
10963
10964 // Get an ordered list of the machine basic blocks for the jump table.
10965 std::vector<MachineBasicBlock*> LPadList;
10967 LPadList.reserve(CallSiteNumToLPad.size());
10968 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10969 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10970 for (MachineBasicBlock *MBB : MBBList) {
10971 LPadList.push_back(MBB);
10972 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10973 }
10974 }
10975
10976 assert(!LPadList.empty() &&
10977 "No landing pad destinations for the dispatch jump table!");
10978
10979 // Create the jump table and associated information.
10981 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10982 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10983
10984 // Create the MBBs for the dispatch code.
10985
10986 // Shove the dispatch's address into the return slot in the function context.
10987 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10988 DispatchBB->setIsEHPad();
10989
10990 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10991 unsigned trap_opcode;
10992 if (Subtarget->isThumb())
10993 trap_opcode = ARM::tTRAP;
10994 else
10995 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10996
10997 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10998 DispatchBB->addSuccessor(TrapBB);
10999
11000 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11001 DispatchBB->addSuccessor(DispContBB);
11002
11003 // Insert and MBBs.
11004 MF->insert(MF->end(), DispatchBB);
11005 MF->insert(MF->end(), DispContBB);
11006 MF->insert(MF->end(), TrapBB);
11007
11008 // Insert code into the entry block that creates and registers the function
11009 // context.
11010 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11011
11012 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11015
11017 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11018
11019 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11020 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11021
11022 // Add a register mask with no preserved registers. This results in all
11023 // registers being marked as clobbered. This can't work if the dispatch block
11024 // is in a Thumb1 function and is linked with ARM code which uses the FP
11025 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11027
11028 bool IsPositionIndependent = isPositionIndependent();
11029 unsigned NumLPads = LPadList.size();
11030 if (Subtarget->isThumb2()) {
11031 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11032 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11033 .addFrameIndex(FI)
11034 .addImm(4)
11035 .addMemOperand(FIMMOLd)
11037
11038 if (NumLPads < 256) {
11039 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11040 .addReg(NewVReg1)
11041 .addImm(LPadList.size())
11043 } else {
11044 Register VReg1 = MRI->createVirtualRegister(TRC);
11045 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11046 .addImm(NumLPads & 0xFFFF)
11048
11049 unsigned VReg2 = VReg1;
11050 if ((NumLPads & 0xFFFF0000) != 0) {
11051 VReg2 = MRI->createVirtualRegister(TRC);
11052 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11053 .addReg(VReg1)
11054 .addImm(NumLPads >> 16)
11056 }
11057
11058 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11059 .addReg(NewVReg1)
11060 .addReg(VReg2)
11062 }
11063
11064 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11065 .addMBB(TrapBB)
11067 .addReg(ARM::CPSR);
11068
11069 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11070 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11071 .addJumpTableIndex(MJTI)
11073
11074 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11075 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11076 .addReg(NewVReg3, RegState::Kill)
11077 .addReg(NewVReg1)
11080 .add(condCodeOp());
11081
11082 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11083 .addReg(NewVReg4, RegState::Kill)
11084 .addReg(NewVReg1)
11085 .addJumpTableIndex(MJTI);
11086 } else if (Subtarget->isThumb()) {
11087 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11088 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11089 .addFrameIndex(FI)
11090 .addImm(1)
11091 .addMemOperand(FIMMOLd)
11093
11094 if (NumLPads < 256) {
11095 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11096 .addReg(NewVReg1)
11097 .addImm(NumLPads)
11099 } else {
11100 MachineConstantPool *ConstantPool = MF->getConstantPool();
11101 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11102 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11103
11104 // MachineConstantPool wants an explicit alignment.
11105 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11106 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11107
11108 Register VReg1 = MRI->createVirtualRegister(TRC);
11109 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11110 .addReg(VReg1, RegState::Define)
11113 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11114 .addReg(NewVReg1)
11115 .addReg(VReg1)
11117 }
11118
11119 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11120 .addMBB(TrapBB)
11122 .addReg(ARM::CPSR);
11123
11124 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11125 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11126 .addReg(ARM::CPSR, RegState::Define)
11127 .addReg(NewVReg1)
11128 .addImm(2)
11130
11131 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11132 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11133 .addJumpTableIndex(MJTI)
11135
11136 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11137 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11138 .addReg(ARM::CPSR, RegState::Define)
11139 .addReg(NewVReg2, RegState::Kill)
11140 .addReg(NewVReg3)
11142
11143 MachineMemOperand *JTMMOLd =
11144 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11146
11147 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11148 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11149 .addReg(NewVReg4, RegState::Kill)
11150 .addImm(0)
11151 .addMemOperand(JTMMOLd)
11153
11154 unsigned NewVReg6 = NewVReg5;
11155 if (IsPositionIndependent) {
11156 NewVReg6 = MRI->createVirtualRegister(TRC);
11157 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11158 .addReg(ARM::CPSR, RegState::Define)
11159 .addReg(NewVReg5, RegState::Kill)
11160 .addReg(NewVReg3)
11162 }
11163
11164 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11165 .addReg(NewVReg6, RegState::Kill)
11166 .addJumpTableIndex(MJTI);
11167 } else {
11168 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11169 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11170 .addFrameIndex(FI)
11171 .addImm(4)
11172 .addMemOperand(FIMMOLd)
11174
11175 if (NumLPads < 256) {
11176 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11177 .addReg(NewVReg1)
11178 .addImm(NumLPads)
11180 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11181 Register VReg1 = MRI->createVirtualRegister(TRC);
11182 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11183 .addImm(NumLPads & 0xFFFF)
11185
11186 unsigned VReg2 = VReg1;
11187 if ((NumLPads & 0xFFFF0000) != 0) {
11188 VReg2 = MRI->createVirtualRegister(TRC);
11189 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11190 .addReg(VReg1)
11191 .addImm(NumLPads >> 16)
11193 }
11194
11195 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11196 .addReg(NewVReg1)
11197 .addReg(VReg2)
11199 } else {
11200 MachineConstantPool *ConstantPool = MF->getConstantPool();
11201 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11202 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11203
11204 // MachineConstantPool wants an explicit alignment.
11205 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11206 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11207
11208 Register VReg1 = MRI->createVirtualRegister(TRC);
11209 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11210 .addReg(VReg1, RegState::Define)
11212 .addImm(0)
11214 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11215 .addReg(NewVReg1)
11216 .addReg(VReg1, RegState::Kill)
11218 }
11219
11220 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11221 .addMBB(TrapBB)
11223 .addReg(ARM::CPSR);
11224
11225 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11226 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11227 .addReg(NewVReg1)
11230 .add(condCodeOp());
11231 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11232 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11233 .addJumpTableIndex(MJTI)
11235
11236 MachineMemOperand *JTMMOLd =
11237 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11239 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11240 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11241 .addReg(NewVReg3, RegState::Kill)
11242 .addReg(NewVReg4)
11243 .addImm(0)
11244 .addMemOperand(JTMMOLd)
11246
11247 if (IsPositionIndependent) {
11248 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11249 .addReg(NewVReg5, RegState::Kill)
11250 .addReg(NewVReg4)
11251 .addJumpTableIndex(MJTI);
11252 } else {
11253 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11254 .addReg(NewVReg5, RegState::Kill)
11255 .addJumpTableIndex(MJTI);
11256 }
11257 }
11258
11259 // Add the jump table entries as successors to the MBB.
11261 for (MachineBasicBlock *CurMBB : LPadList) {
11262 if (SeenMBBs.insert(CurMBB).second)
11263 DispContBB->addSuccessor(CurMBB);
11264 }
11265
11266 // N.B. the order the invoke BBs are processed in doesn't matter here.
11267 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11269 for (MachineBasicBlock *BB : InvokeBBs) {
11270
11271 // Remove the landing pad successor from the invoke block and replace it
11272 // with the new dispatch block.
11273 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11274 while (!Successors.empty()) {
11275 MachineBasicBlock *SMBB = Successors.pop_back_val();
11276 if (SMBB->isEHPad()) {
11277 BB->removeSuccessor(SMBB);
11278 MBBLPads.push_back(SMBB);
11279 }
11280 }
11281
11282 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11283 BB->normalizeSuccProbs();
11284
11285 // Find the invoke call and mark all of the callee-saved registers as
11286 // 'implicit defined' so that they're spilled. This prevents code from
11287 // moving instructions to before the EH block, where they will never be
11288 // executed.
11290 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11291 if (!II->isCall()) continue;
11292
11295 OI = II->operands_begin(), OE = II->operands_end();
11296 OI != OE; ++OI) {
11297 if (!OI->isReg()) continue;
11298 DefRegs[OI->getReg()] = true;
11299 }
11300
11301 MachineInstrBuilder MIB(*MF, &*II);
11302
11303 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11304 unsigned Reg = SavedRegs[i];
11305 if (Subtarget->isThumb2() &&
11306 !ARM::tGPRRegClass.contains(Reg) &&
11307 !ARM::hGPRRegClass.contains(Reg))
11308 continue;
11309 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11310 continue;
11311 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11312 continue;
11313 if (!DefRegs[Reg])
11315 }
11316
11317 break;
11318 }
11319 }
11320
11321 // Mark all former landing pads as non-landing pads. The dispatch is the only
11322 // landing pad now.
11323 for (MachineBasicBlock *MBBLPad : MBBLPads)
11324 MBBLPad->setIsEHPad(false);
11325
11326 // The instruction is gone now.
11327 MI.eraseFromParent();
11328}
11329
11330static
11332 for (MachineBasicBlock *S : MBB->successors())
11333 if (S != Succ)
11334 return S;
11335 llvm_unreachable("Expecting a BB with two successors!");
11336}
11337
11338/// Return the load opcode for a given load size. If load size >= 8,
11339/// neon opcode will be returned.
11340static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11341 if (LdSize >= 8)
11342 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11343 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11344 if (IsThumb1)
11345 return LdSize == 4 ? ARM::tLDRi
11346 : LdSize == 2 ? ARM::tLDRHi
11347 : LdSize == 1 ? ARM::tLDRBi : 0;
11348 if (IsThumb2)
11349 return LdSize == 4 ? ARM::t2LDR_POST
11350 : LdSize == 2 ? ARM::t2LDRH_POST
11351 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11352 return LdSize == 4 ? ARM::LDR_POST_IMM
11353 : LdSize == 2 ? ARM::LDRH_POST
11354 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11355}
11356
11357/// Return the store opcode for a given store size. If store size >= 8,
11358/// neon opcode will be returned.
11359static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11360 if (StSize >= 8)
11361 return StSize == 16 ? ARM::VST1q32wb_fixed
11362 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11363 if (IsThumb1)
11364 return StSize == 4 ? ARM::tSTRi
11365 : StSize == 2 ? ARM::tSTRHi
11366 : StSize == 1 ? ARM::tSTRBi : 0;
11367 if (IsThumb2)
11368 return StSize == 4 ? ARM::t2STR_POST
11369 : StSize == 2 ? ARM::t2STRH_POST
11370 : StSize == 1 ? ARM::t2STRB_POST : 0;
11371 return StSize == 4 ? ARM::STR_POST_IMM
11372 : StSize == 2 ? ARM::STRH_POST
11373 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11374}
11375
11376/// Emit a post-increment load operation with given size. The instructions
11377/// will be added to BB at Pos.
11379 const TargetInstrInfo *TII, const DebugLoc &dl,
11380 unsigned LdSize, unsigned Data, unsigned AddrIn,
11381 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11382 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11383 assert(LdOpc != 0 && "Should have a load opcode");
11384 if (LdSize >= 8) {
11385 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11386 .addReg(AddrOut, RegState::Define)
11387 .addReg(AddrIn)
11388 .addImm(0)
11390 } else if (IsThumb1) {
11391 // load + update AddrIn
11392 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11393 .addReg(AddrIn)
11394 .addImm(0)
11396 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11397 .add(t1CondCodeOp())
11398 .addReg(AddrIn)
11399 .addImm(LdSize)
11401 } else if (IsThumb2) {
11402 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11403 .addReg(AddrOut, RegState::Define)
11404 .addReg(AddrIn)
11405 .addImm(LdSize)
11407 } else { // arm
11408 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11409 .addReg(AddrOut, RegState::Define)
11410 .addReg(AddrIn)
11411 .addReg(0)
11412 .addImm(LdSize)
11414 }
11415}
11416
11417/// Emit a post-increment store operation with given size. The instructions
11418/// will be added to BB at Pos.
11420 const TargetInstrInfo *TII, const DebugLoc &dl,
11421 unsigned StSize, unsigned Data, unsigned AddrIn,
11422 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11423 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11424 assert(StOpc != 0 && "Should have a store opcode");
11425 if (StSize >= 8) {
11426 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11427 .addReg(AddrIn)
11428 .addImm(0)
11429 .addReg(Data)
11431 } else if (IsThumb1) {
11432 // store + update AddrIn
11433 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11434 .addReg(Data)
11435 .addReg(AddrIn)
11436 .addImm(0)
11438 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11439 .add(t1CondCodeOp())
11440 .addReg(AddrIn)
11441 .addImm(StSize)
11443 } else if (IsThumb2) {
11444 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11445 .addReg(Data)
11446 .addReg(AddrIn)
11447 .addImm(StSize)
11449 } else { // arm
11450 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11451 .addReg(Data)
11452 .addReg(AddrIn)
11453 .addReg(0)
11454 .addImm(StSize)
11456 }
11457}
11458
11460ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11461 MachineBasicBlock *BB) const {
11462 // This pseudo instruction has 3 operands: dst, src, size
11463 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11464 // Otherwise, we will generate unrolled scalar copies.
11465 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11466 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11468
11469 Register dest = MI.getOperand(0).getReg();
11470 Register src = MI.getOperand(1).getReg();
11471 unsigned SizeVal = MI.getOperand(2).getImm();
11472 unsigned Alignment = MI.getOperand(3).getImm();
11473 DebugLoc dl = MI.getDebugLoc();
11474
11475 MachineFunction *MF = BB->getParent();
11477 unsigned UnitSize = 0;
11478 const TargetRegisterClass *TRC = nullptr;
11479 const TargetRegisterClass *VecTRC = nullptr;
11480
11481 bool IsThumb1 = Subtarget->isThumb1Only();
11482 bool IsThumb2 = Subtarget->isThumb2();
11483 bool IsThumb = Subtarget->isThumb();
11484
11485 if (Alignment & 1) {
11486 UnitSize = 1;
11487 } else if (Alignment & 2) {
11488 UnitSize = 2;
11489 } else {
11490 // Check whether we can use NEON instructions.
11491 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11492 Subtarget->hasNEON()) {
11493 if ((Alignment % 16 == 0) && SizeVal >= 16)
11494 UnitSize = 16;
11495 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11496 UnitSize = 8;
11497 }
11498 // Can't use NEON instructions.
11499 if (UnitSize == 0)
11500 UnitSize = 4;
11501 }
11502
11503 // Select the correct opcode and register class for unit size load/store
11504 bool IsNeon = UnitSize >= 8;
11505 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11506 if (IsNeon)
11507 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11508 : UnitSize == 8 ? &ARM::DPRRegClass
11509 : nullptr;
11510
11511 unsigned BytesLeft = SizeVal % UnitSize;
11512 unsigned LoopSize = SizeVal - BytesLeft;
11513
11514 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11515 // Use LDR and STR to copy.
11516 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11517 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11518 unsigned srcIn = src;
11519 unsigned destIn = dest;
11520 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11521 Register srcOut = MRI.createVirtualRegister(TRC);
11522 Register destOut = MRI.createVirtualRegister(TRC);
11523 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11524 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11525 IsThumb1, IsThumb2);
11526 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11527 IsThumb1, IsThumb2);
11528 srcIn = srcOut;
11529 destIn = destOut;
11530 }
11531
11532 // Handle the leftover bytes with LDRB and STRB.
11533 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11534 // [destOut] = STRB_POST(scratch, destIn, 1)
11535 for (unsigned i = 0; i < BytesLeft; i++) {
11536 Register srcOut = MRI.createVirtualRegister(TRC);
11537 Register destOut = MRI.createVirtualRegister(TRC);
11538 Register scratch = MRI.createVirtualRegister(TRC);
11539 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11540 IsThumb1, IsThumb2);
11541 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11542 IsThumb1, IsThumb2);
11543 srcIn = srcOut;
11544 destIn = destOut;
11545 }
11546 MI.eraseFromParent(); // The instruction is gone now.
11547 return BB;
11548 }
11549
11550 // Expand the pseudo op to a loop.
11551 // thisMBB:
11552 // ...
11553 // movw varEnd, # --> with thumb2
11554 // movt varEnd, #
11555 // ldrcp varEnd, idx --> without thumb2
11556 // fallthrough --> loopMBB
11557 // loopMBB:
11558 // PHI varPhi, varEnd, varLoop
11559 // PHI srcPhi, src, srcLoop
11560 // PHI destPhi, dst, destLoop
11561 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11562 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11563 // subs varLoop, varPhi, #UnitSize
11564 // bne loopMBB
11565 // fallthrough --> exitMBB
11566 // exitMBB:
11567 // epilogue to handle left-over bytes
11568 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11569 // [destOut] = STRB_POST(scratch, destLoop, 1)
11570 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11571 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11572 MF->insert(It, loopMBB);
11573 MF->insert(It, exitMBB);
11574
11575 // Set the call frame size on entry to the new basic blocks.
11576 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11577 loopMBB->setCallFrameSize(CallFrameSize);
11578 exitMBB->setCallFrameSize(CallFrameSize);
11579
11580 // Transfer the remainder of BB and its successor edges to exitMBB.
11581 exitMBB->splice(exitMBB->begin(), BB,
11582 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11584
11585 // Load an immediate to varEnd.
11586 Register varEnd = MRI.createVirtualRegister(TRC);
11587 if (Subtarget->useMovt()) {
11588 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11589 varEnd)
11590 .addImm(LoopSize);
11591 } else if (Subtarget->genExecuteOnly()) {
11592 assert(IsThumb && "Non-thumb expected to have used movt");
11593 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11594 } else {
11596 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11597 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11598
11599 // MachineConstantPool wants an explicit alignment.
11600 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11601 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11602 MachineMemOperand *CPMMO =
11605
11606 if (IsThumb)
11607 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11608 .addReg(varEnd, RegState::Define)
11611 .addMemOperand(CPMMO);
11612 else
11613 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11614 .addReg(varEnd, RegState::Define)
11616 .addImm(0)
11618 .addMemOperand(CPMMO);
11619 }
11620 BB->addSuccessor(loopMBB);
11621
11622 // Generate the loop body:
11623 // varPhi = PHI(varLoop, varEnd)
11624 // srcPhi = PHI(srcLoop, src)
11625 // destPhi = PHI(destLoop, dst)
11626 MachineBasicBlock *entryBB = BB;
11627 BB = loopMBB;
11628 Register varLoop = MRI.createVirtualRegister(TRC);
11629 Register varPhi = MRI.createVirtualRegister(TRC);
11630 Register srcLoop = MRI.createVirtualRegister(TRC);
11631 Register srcPhi = MRI.createVirtualRegister(TRC);
11632 Register destLoop = MRI.createVirtualRegister(TRC);
11633 Register destPhi = MRI.createVirtualRegister(TRC);
11634
11635 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11636 .addReg(varLoop).addMBB(loopMBB)
11637 .addReg(varEnd).addMBB(entryBB);
11638 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11639 .addReg(srcLoop).addMBB(loopMBB)
11640 .addReg(src).addMBB(entryBB);
11641 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11642 .addReg(destLoop).addMBB(loopMBB)
11643 .addReg(dest).addMBB(entryBB);
11644
11645 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11646 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11647 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11648 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11649 IsThumb1, IsThumb2);
11650 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11651 IsThumb1, IsThumb2);
11652
11653 // Decrement loop variable by UnitSize.
11654 if (IsThumb1) {
11655 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11656 .add(t1CondCodeOp())
11657 .addReg(varPhi)
11658 .addImm(UnitSize)
11660 } else {
11662 BuildMI(*BB, BB->end(), dl,
11663 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11664 MIB.addReg(varPhi)
11665 .addImm(UnitSize)
11667 .add(condCodeOp());
11668 MIB->getOperand(5).setReg(ARM::CPSR);
11669 MIB->getOperand(5).setIsDef(true);
11670 }
11671 BuildMI(*BB, BB->end(), dl,
11672 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11673 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11674
11675 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11676 BB->addSuccessor(loopMBB);
11677 BB->addSuccessor(exitMBB);
11678
11679 // Add epilogue to handle BytesLeft.
11680 BB = exitMBB;
11681 auto StartOfExit = exitMBB->begin();
11682
11683 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11684 // [destOut] = STRB_POST(scratch, destLoop, 1)
11685 unsigned srcIn = srcLoop;
11686 unsigned destIn = destLoop;
11687 for (unsigned i = 0; i < BytesLeft; i++) {
11688 Register srcOut = MRI.createVirtualRegister(TRC);
11689 Register destOut = MRI.createVirtualRegister(TRC);
11690 Register scratch = MRI.createVirtualRegister(TRC);
11691 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11692 IsThumb1, IsThumb2);
11693 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11694 IsThumb1, IsThumb2);
11695 srcIn = srcOut;
11696 destIn = destOut;
11697 }
11698
11699 MI.eraseFromParent(); // The instruction is gone now.
11700 return BB;
11701}
11702
11704ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11705 MachineBasicBlock *MBB) const {
11707 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11708 DebugLoc DL = MI.getDebugLoc();
11709
11710 assert(Subtarget->isTargetWindows() &&
11711 "__chkstk is only supported on Windows");
11712 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11713
11714 // __chkstk takes the number of words to allocate on the stack in R4, and
11715 // returns the stack adjustment in number of bytes in R4. This will not
11716 // clober any other registers (other than the obvious lr).
11717 //
11718 // Although, technically, IP should be considered a register which may be
11719 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11720 // thumb-2 environment, so there is no interworking required. As a result, we
11721 // do not expect a veneer to be emitted by the linker, clobbering IP.
11722 //
11723 // Each module receives its own copy of __chkstk, so no import thunk is
11724 // required, again, ensuring that IP is not clobbered.
11725 //
11726 // Finally, although some linkers may theoretically provide a trampoline for
11727 // out of range calls (which is quite common due to a 32M range limitation of
11728 // branches for Thumb), we can generate the long-call version via
11729 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11730 // IP.
11731
11732 switch (TM.getCodeModel()) {
11733 case CodeModel::Tiny:
11734 llvm_unreachable("Tiny code model not available on ARM.");
11735 case CodeModel::Small:
11736 case CodeModel::Medium:
11737 case CodeModel::Kernel:
11738 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11740 .addExternalSymbol("__chkstk")
11743 .addReg(ARM::R12,
11745 .addReg(ARM::CPSR,
11747 break;
11748 case CodeModel::Large: {
11750 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11751
11752 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11753 .addExternalSymbol("__chkstk");
11756 .addReg(Reg, RegState::Kill)
11759 .addReg(ARM::R12,
11761 .addReg(ARM::CPSR,
11763 break;
11764 }
11765 }
11766
11767 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11768 .addReg(ARM::SP, RegState::Kill)
11769 .addReg(ARM::R4, RegState::Kill)
11772 .add(condCodeOp());
11773
11774 MI.eraseFromParent();
11775 return MBB;
11776}
11777
11779ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11780 MachineBasicBlock *MBB) const {
11781 DebugLoc DL = MI.getDebugLoc();
11782 MachineFunction *MF = MBB->getParent();
11783 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11784
11786 MF->insert(++MBB->getIterator(), ContBB);
11787 ContBB->splice(ContBB->begin(), MBB,
11788 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11790 MBB->addSuccessor(ContBB);
11791
11793 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11794 MF->push_back(TrapBB);
11795 MBB->addSuccessor(TrapBB);
11796
11797 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11798 .addReg(MI.getOperand(0).getReg())
11799 .addImm(0)
11801 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11802 .addMBB(TrapBB)
11804 .addReg(ARM::CPSR);
11805
11806 MI.eraseFromParent();
11807 return ContBB;
11808}
11809
11810// The CPSR operand of SelectItr might be missing a kill marker
11811// because there were multiple uses of CPSR, and ISel didn't know
11812// which to mark. Figure out whether SelectItr should have had a
11813// kill marker, and set it if it should. Returns the correct kill
11814// marker value.
11817 const TargetRegisterInfo* TRI) {
11818 // Scan forward through BB for a use/def of CPSR.
11819 MachineBasicBlock::iterator miI(std::next(SelectItr));
11820 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11821 const MachineInstr& mi = *miI;
11822 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11823 return false;
11824 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11825 break; // Should have kill-flag - update below.
11826 }
11827
11828 // If we hit the end of the block, check whether CPSR is live into a
11829 // successor.
11830 if (miI == BB->end()) {
11831 for (MachineBasicBlock *Succ : BB->successors())
11832 if (Succ->isLiveIn(ARM::CPSR))
11833 return false;
11834 }
11835
11836 // We found a def, or hit the end of the basic block and CPSR wasn't live
11837 // out. SelectMI should have a kill flag on CPSR.
11838 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11839 return true;
11840}
11841
11842/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11843/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11845 MachineBasicBlock *TpLoopBody,
11846 MachineBasicBlock *TpExit, Register OpSizeReg,
11847 const TargetInstrInfo *TII, DebugLoc Dl,
11849 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11850 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11851 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11852 .addUse(OpSizeReg)
11853 .addImm(15)
11855 .addReg(0);
11856
11857 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11858 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11859 .addUse(AddDestReg, RegState::Kill)
11860 .addImm(4)
11862 .addReg(0);
11863
11864 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11865 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11866 .addUse(LsrDestReg, RegState::Kill);
11867
11868 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11869 .addUse(TotalIterationsReg)
11870 .addMBB(TpExit);
11871
11872 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11873 .addMBB(TpLoopBody)
11875
11876 return TotalIterationsReg;
11877}
11878
11879/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11880/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11881/// loops.
11882static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11883 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11884 const TargetInstrInfo *TII, DebugLoc Dl,
11885 MachineRegisterInfo &MRI, Register OpSrcReg,
11886 Register OpDestReg, Register ElementCountReg,
11887 Register TotalIterationsReg, bool IsMemcpy) {
11888 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11889 // array, loop iteration counter, predication counter.
11890
11891 Register SrcPhiReg, CurrSrcReg;
11892 if (IsMemcpy) {
11893 // Current position in the src array
11894 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11895 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11896 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11897 .addUse(OpSrcReg)
11898 .addMBB(TpEntry)
11899 .addUse(CurrSrcReg)
11900 .addMBB(TpLoopBody);
11901 }
11902
11903 // Current position in the dest array
11904 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11905 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11906 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11907 .addUse(OpDestReg)
11908 .addMBB(TpEntry)
11909 .addUse(CurrDestReg)
11910 .addMBB(TpLoopBody);
11911
11912 // Current loop counter
11913 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11914 Register RemainingLoopIterationsReg =
11915 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11916 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11917 .addUse(TotalIterationsReg)
11918 .addMBB(TpEntry)
11919 .addUse(RemainingLoopIterationsReg)
11920 .addMBB(TpLoopBody);
11921
11922 // Predication counter
11923 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11924 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11925 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11926 .addUse(ElementCountReg)
11927 .addMBB(TpEntry)
11928 .addUse(RemainingElementsReg)
11929 .addMBB(TpLoopBody);
11930
11931 // Pass predication counter to VCTP
11932 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11933 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11934 .addUse(PredCounterPhiReg)
11936 .addReg(0)
11937 .addReg(0);
11938
11939 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11940 .addUse(PredCounterPhiReg)
11941 .addImm(16)
11943 .addReg(0);
11944
11945 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11946 Register SrcValueReg;
11947 if (IsMemcpy) {
11948 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11949 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11950 .addDef(CurrSrcReg)
11951 .addDef(SrcValueReg)
11952 .addReg(SrcPhiReg)
11953 .addImm(16)
11955 .addUse(VccrReg)
11956 .addReg(0);
11957 } else
11958 SrcValueReg = OpSrcReg;
11959
11960 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11961 .addDef(CurrDestReg)
11962 .addUse(SrcValueReg)
11963 .addReg(DestPhiReg)
11964 .addImm(16)
11966 .addUse(VccrReg)
11967 .addReg(0);
11968
11969 // Add the pseudoInstrs for decrementing the loop counter and marking the
11970 // end:t2DoLoopDec and t2DoLoopEnd
11971 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11972 .addUse(LoopCounterPhiReg)
11973 .addImm(1);
11974
11975 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11976 .addUse(RemainingLoopIterationsReg)
11977 .addMBB(TpLoopBody);
11978
11979 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11980 .addMBB(TpExit)
11982}
11983
11986 MachineBasicBlock *BB) const {
11987 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11988 DebugLoc dl = MI.getDebugLoc();
11989 bool isThumb2 = Subtarget->isThumb2();
11990 switch (MI.getOpcode()) {
11991 default: {
11992 MI.print(errs());
11993 llvm_unreachable("Unexpected instr type to insert");
11994 }
11995
11996 // Thumb1 post-indexed loads are really just single-register LDMs.
11997 case ARM::tLDR_postidx: {
11998 MachineOperand Def(MI.getOperand(1));
11999 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12000 .add(Def) // Rn_wb
12001 .add(MI.getOperand(2)) // Rn
12002 .add(MI.getOperand(3)) // PredImm
12003 .add(MI.getOperand(4)) // PredReg
12004 .add(MI.getOperand(0)) // Rt
12005 .cloneMemRefs(MI);
12006 MI.eraseFromParent();
12007 return BB;
12008 }
12009
12010 case ARM::MVE_MEMCPYLOOPINST:
12011 case ARM::MVE_MEMSETLOOPINST: {
12012
12013 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12014 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12015 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12016 // adds the relevant instructions in the TP loop Body for generation of a
12017 // WLSTP loop.
12018
12019 // Below is relevant portion of the CFG after the transformation.
12020 // The Machine Basic Blocks are shown along with branch conditions (in
12021 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12022 // portion of the CFG and may not necessarily be the entry/exit of the
12023 // function.
12024
12025 // (Relevant) CFG after transformation:
12026 // TP entry MBB
12027 // |
12028 // |-----------------|
12029 // (n <= 0) (n > 0)
12030 // | |
12031 // | TP loop Body MBB<--|
12032 // | | |
12033 // \ |___________|
12034 // \ /
12035 // TP exit MBB
12036
12037 MachineFunction *MF = BB->getParent();
12038 MachineFunctionProperties &Properties = MF->getProperties();
12040
12041 Register OpDestReg = MI.getOperand(0).getReg();
12042 Register OpSrcReg = MI.getOperand(1).getReg();
12043 Register OpSizeReg = MI.getOperand(2).getReg();
12044
12045 // Allocate the required MBBs and add to parent function.
12046 MachineBasicBlock *TpEntry = BB;
12047 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12048 MachineBasicBlock *TpExit;
12049
12050 MF->push_back(TpLoopBody);
12051
12052 // If any instructions are present in the current block after
12053 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12054 // move the instructions into the newly created exit block. If there are no
12055 // instructions add an explicit branch to the FallThrough block and then
12056 // split.
12057 //
12058 // The split is required for two reasons:
12059 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12060 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12061 // need to be updated. splitAt() already handles this.
12062 TpExit = BB->splitAt(MI, false);
12063 if (TpExit == BB) {
12064 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12065 "block containing memcpy/memset Pseudo");
12066 TpExit = BB->getFallThrough();
12067 BuildMI(BB, dl, TII->get(ARM::t2B))
12068 .addMBB(TpExit)
12070 TpExit = BB->splitAt(MI, false);
12071 }
12072
12073 // Add logic for iteration count
12074 Register TotalIterationsReg =
12075 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12076
12077 // Add the vectorized (and predicated) loads/store instructions
12078 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12079 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12080 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12081
12082 // Required to avoid conflict with the MachineVerifier during testing.
12084
12085 // Connect the blocks
12086 TpEntry->addSuccessor(TpLoopBody);
12087 TpLoopBody->addSuccessor(TpLoopBody);
12088 TpLoopBody->addSuccessor(TpExit);
12089
12090 // Reorder for a more natural layout
12091 TpLoopBody->moveAfter(TpEntry);
12092 TpExit->moveAfter(TpLoopBody);
12093
12094 // Finally, remove the memcpy Pseudo Instruction
12095 MI.eraseFromParent();
12096
12097 // Return the exit block as it may contain other instructions requiring a
12098 // custom inserter
12099 return TpExit;
12100 }
12101
12102 // The Thumb2 pre-indexed stores have the same MI operands, they just
12103 // define them differently in the .td files from the isel patterns, so
12104 // they need pseudos.
12105 case ARM::t2STR_preidx:
12106 MI.setDesc(TII->get(ARM::t2STR_PRE));
12107 return BB;
12108 case ARM::t2STRB_preidx:
12109 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12110 return BB;
12111 case ARM::t2STRH_preidx:
12112 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12113 return BB;
12114
12115 case ARM::STRi_preidx:
12116 case ARM::STRBi_preidx: {
12117 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12118 : ARM::STRB_PRE_IMM;
12119 // Decode the offset.
12120 unsigned Offset = MI.getOperand(4).getImm();
12121 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12123 if (isSub)
12124 Offset = -Offset;
12125
12126 MachineMemOperand *MMO = *MI.memoperands_begin();
12127 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12128 .add(MI.getOperand(0)) // Rn_wb
12129 .add(MI.getOperand(1)) // Rt
12130 .add(MI.getOperand(2)) // Rn
12131 .addImm(Offset) // offset (skip GPR==zero_reg)
12132 .add(MI.getOperand(5)) // pred
12133 .add(MI.getOperand(6))
12134 .addMemOperand(MMO);
12135 MI.eraseFromParent();
12136 return BB;
12137 }
12138 case ARM::STRr_preidx:
12139 case ARM::STRBr_preidx:
12140 case ARM::STRH_preidx: {
12141 unsigned NewOpc;
12142 switch (MI.getOpcode()) {
12143 default: llvm_unreachable("unexpected opcode!");
12144 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12145 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12146 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12147 }
12148 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12149 for (const MachineOperand &MO : MI.operands())
12150 MIB.add(MO);
12151 MI.eraseFromParent();
12152 return BB;
12153 }
12154
12155 case ARM::tMOVCCr_pseudo: {
12156 // To "insert" a SELECT_CC instruction, we actually have to insert the
12157 // diamond control-flow pattern. The incoming instruction knows the
12158 // destination vreg to set, the condition code register to branch on, the
12159 // true/false values to select between, and a branch opcode to use.
12160 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12162
12163 // thisMBB:
12164 // ...
12165 // TrueVal = ...
12166 // cmpTY ccX, r1, r2
12167 // bCC copy1MBB
12168 // fallthrough --> copy0MBB
12169 MachineBasicBlock *thisMBB = BB;
12170 MachineFunction *F = BB->getParent();
12171 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12172 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12173 F->insert(It, copy0MBB);
12174 F->insert(It, sinkMBB);
12175
12176 // Set the call frame size on entry to the new basic blocks.
12177 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12178 copy0MBB->setCallFrameSize(CallFrameSize);
12179 sinkMBB->setCallFrameSize(CallFrameSize);
12180
12181 // Check whether CPSR is live past the tMOVCCr_pseudo.
12182 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12183 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12184 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12185 copy0MBB->addLiveIn(ARM::CPSR);
12186 sinkMBB->addLiveIn(ARM::CPSR);
12187 }
12188
12189 // Transfer the remainder of BB and its successor edges to sinkMBB.
12190 sinkMBB->splice(sinkMBB->begin(), BB,
12191 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12193
12194 BB->addSuccessor(copy0MBB);
12195 BB->addSuccessor(sinkMBB);
12196
12197 BuildMI(BB, dl, TII->get(ARM::tBcc))
12198 .addMBB(sinkMBB)
12199 .addImm(MI.getOperand(3).getImm())
12200 .addReg(MI.getOperand(4).getReg());
12201
12202 // copy0MBB:
12203 // %FalseValue = ...
12204 // # fallthrough to sinkMBB
12205 BB = copy0MBB;
12206
12207 // Update machine-CFG edges
12208 BB->addSuccessor(sinkMBB);
12209
12210 // sinkMBB:
12211 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12212 // ...
12213 BB = sinkMBB;
12214 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12215 .addReg(MI.getOperand(1).getReg())
12216 .addMBB(copy0MBB)
12217 .addReg(MI.getOperand(2).getReg())
12218 .addMBB(thisMBB);
12219
12220 MI.eraseFromParent(); // The pseudo instruction is gone now.
12221 return BB;
12222 }
12223
12224 case ARM::BCCi64:
12225 case ARM::BCCZi64: {
12226 // If there is an unconditional branch to the other successor, remove it.
12227 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12228
12229 // Compare both parts that make up the double comparison separately for
12230 // equality.
12231 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12232
12233 Register LHS1 = MI.getOperand(1).getReg();
12234 Register LHS2 = MI.getOperand(2).getReg();
12235 if (RHSisZero) {
12236 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12237 .addReg(LHS1)
12238 .addImm(0)
12240 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12241 .addReg(LHS2).addImm(0)
12242 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12243 } else {
12244 Register RHS1 = MI.getOperand(3).getReg();
12245 Register RHS2 = MI.getOperand(4).getReg();
12246 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12247 .addReg(LHS1)
12248 .addReg(RHS1)
12250 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12251 .addReg(LHS2).addReg(RHS2)
12252 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12253 }
12254
12255 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12256 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12257 if (MI.getOperand(0).getImm() == ARMCC::NE)
12258 std::swap(destMBB, exitMBB);
12259
12260 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12261 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12262 if (isThumb2)
12263 BuildMI(BB, dl, TII->get(ARM::t2B))
12264 .addMBB(exitMBB)
12266 else
12267 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12268
12269 MI.eraseFromParent(); // The pseudo instruction is gone now.
12270 return BB;
12271 }
12272
12273 case ARM::Int_eh_sjlj_setjmp:
12274 case ARM::Int_eh_sjlj_setjmp_nofp:
12275 case ARM::tInt_eh_sjlj_setjmp:
12276 case ARM::t2Int_eh_sjlj_setjmp:
12277 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12278 return BB;
12279
12280 case ARM::Int_eh_sjlj_setup_dispatch:
12281 EmitSjLjDispatchBlock(MI, BB);
12282 return BB;
12283
12284 case ARM::ABS:
12285 case ARM::t2ABS: {
12286 // To insert an ABS instruction, we have to insert the
12287 // diamond control-flow pattern. The incoming instruction knows the
12288 // source vreg to test against 0, the destination vreg to set,
12289 // the condition code register to branch on, the
12290 // true/false values to select between, and a branch opcode to use.
12291 // It transforms
12292 // V1 = ABS V0
12293 // into
12294 // V2 = MOVS V0
12295 // BCC (branch to SinkBB if V0 >= 0)
12296 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12297 // SinkBB: V1 = PHI(V2, V3)
12298 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12300 MachineFunction *Fn = BB->getParent();
12301 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12302 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12303 Fn->insert(BBI, RSBBB);
12304 Fn->insert(BBI, SinkBB);
12305
12306 Register ABSSrcReg = MI.getOperand(1).getReg();
12307 Register ABSDstReg = MI.getOperand(0).getReg();
12308 bool ABSSrcKIll = MI.getOperand(1).isKill();
12309 bool isThumb2 = Subtarget->isThumb2();
12311 // In Thumb mode S must not be specified if source register is the SP or
12312 // PC and if destination register is the SP, so restrict register class
12313 Register NewRsbDstReg = MRI.createVirtualRegister(
12314 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12315
12316 // Transfer the remainder of BB and its successor edges to sinkMBB.
12317 SinkBB->splice(SinkBB->begin(), BB,
12318 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12320
12321 BB->addSuccessor(RSBBB);
12322 BB->addSuccessor(SinkBB);
12323
12324 // fall through to SinkMBB
12325 RSBBB->addSuccessor(SinkBB);
12326
12327 // insert a cmp at the end of BB
12328 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12329 .addReg(ABSSrcReg)
12330 .addImm(0)
12332
12333 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12334 BuildMI(BB, dl,
12335 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12337
12338 // insert rsbri in RSBBB
12339 // Note: BCC and rsbri will be converted into predicated rsbmi
12340 // by if-conversion pass
12341 BuildMI(*RSBBB, RSBBB->begin(), dl,
12342 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12343 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12344 .addImm(0)
12346 .add(condCodeOp());
12347
12348 // insert PHI in SinkBB,
12349 // reuse ABSDstReg to not change uses of ABS instruction
12350 BuildMI(*SinkBB, SinkBB->begin(), dl,
12351 TII->get(ARM::PHI), ABSDstReg)
12352 .addReg(NewRsbDstReg).addMBB(RSBBB)
12353 .addReg(ABSSrcReg).addMBB(BB);
12354
12355 // remove ABS instruction
12356 MI.eraseFromParent();
12357
12358 // return last added BB
12359 return SinkBB;
12360 }
12361 case ARM::COPY_STRUCT_BYVAL_I32:
12362 ++NumLoopByVals;
12363 return EmitStructByval(MI, BB);
12364 case ARM::WIN__CHKSTK:
12365 return EmitLowered__chkstk(MI, BB);
12366 case ARM::WIN__DBZCHK:
12367 return EmitLowered__dbzchk(MI, BB);
12368 }
12369}
12370
12371/// Attaches vregs to MEMCPY that it will use as scratch registers
12372/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12373/// instead of as a custom inserter because we need the use list from the SDNode.
12374static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12375 MachineInstr &MI, const SDNode *Node) {
12376 bool isThumb1 = Subtarget->isThumb1Only();
12377
12378 DebugLoc DL = MI.getDebugLoc();
12379 MachineFunction *MF = MI.getParent()->getParent();
12381 MachineInstrBuilder MIB(*MF, MI);
12382
12383 // If the new dst/src is unused mark it as dead.
12384 if (!Node->hasAnyUseOfValue(0)) {
12385 MI.getOperand(0).setIsDead(true);
12386 }
12387 if (!Node->hasAnyUseOfValue(1)) {
12388 MI.getOperand(1).setIsDead(true);
12389 }
12390
12391 // The MEMCPY both defines and kills the scratch registers.
12392 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12393 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12394 : &ARM::GPRRegClass);
12396 }
12397}
12398
12400 SDNode *Node) const {
12401 if (MI.getOpcode() == ARM::MEMCPY) {
12402 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12403 return;
12404 }
12405
12406 const MCInstrDesc *MCID = &MI.getDesc();
12407 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12408 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12409 // operand is still set to noreg. If needed, set the optional operand's
12410 // register to CPSR, and remove the redundant implicit def.
12411 //
12412 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12413
12414 // Rename pseudo opcodes.
12415 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12416 unsigned ccOutIdx;
12417 if (NewOpc) {
12418 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12419 MCID = &TII->get(NewOpc);
12420
12421 assert(MCID->getNumOperands() ==
12422 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12423 && "converted opcode should be the same except for cc_out"
12424 " (and, on Thumb1, pred)");
12425
12426 MI.setDesc(*MCID);
12427
12428 // Add the optional cc_out operand
12429 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12430
12431 // On Thumb1, move all input operands to the end, then add the predicate
12432 if (Subtarget->isThumb1Only()) {
12433 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12434 MI.addOperand(MI.getOperand(1));
12435 MI.removeOperand(1);
12436 }
12437
12438 // Restore the ties
12439 for (unsigned i = MI.getNumOperands(); i--;) {
12440 const MachineOperand& op = MI.getOperand(i);
12441 if (op.isReg() && op.isUse()) {
12442 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12443 if (DefIdx != -1)
12444 MI.tieOperands(DefIdx, i);
12445 }
12446 }
12447
12449 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12450 ccOutIdx = 1;
12451 } else
12452 ccOutIdx = MCID->getNumOperands() - 1;
12453 } else
12454 ccOutIdx = MCID->getNumOperands() - 1;
12455
12456 // Any ARM instruction that sets the 's' bit should specify an optional
12457 // "cc_out" operand in the last operand position.
12458 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12459 assert(!NewOpc && "Optional cc_out operand required");
12460 return;
12461 }
12462 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12463 // since we already have an optional CPSR def.
12464 bool definesCPSR = false;
12465 bool deadCPSR = false;
12466 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12467 ++i) {
12468 const MachineOperand &MO = MI.getOperand(i);
12469 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12470 definesCPSR = true;
12471 if (MO.isDead())
12472 deadCPSR = true;
12473 MI.removeOperand(i);
12474 break;
12475 }
12476 }
12477 if (!definesCPSR) {
12478 assert(!NewOpc && "Optional cc_out operand required");
12479 return;
12480 }
12481 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12482 if (deadCPSR) {
12483 assert(!MI.getOperand(ccOutIdx).getReg() &&
12484 "expect uninitialized optional cc_out operand");
12485 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12486 if (!Subtarget->isThumb1Only())
12487 return;
12488 }
12489
12490 // If this instruction was defined with an optional CPSR def and its dag node
12491 // had a live implicit CPSR def, then activate the optional CPSR def.
12492 MachineOperand &MO = MI.getOperand(ccOutIdx);
12493 MO.setReg(ARM::CPSR);
12494 MO.setIsDef(true);
12495}
12496
12497//===----------------------------------------------------------------------===//
12498// ARM Optimization Hooks
12499//===----------------------------------------------------------------------===//
12500
12501// Helper function that checks if N is a null or all ones constant.
12502static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12504}
12505
12506// Return true if N is conditionally 0 or all ones.
12507// Detects these expressions where cc is an i1 value:
12508//
12509// (select cc 0, y) [AllOnes=0]
12510// (select cc y, 0) [AllOnes=0]
12511// (zext cc) [AllOnes=0]
12512// (sext cc) [AllOnes=0/1]
12513// (select cc -1, y) [AllOnes=1]
12514// (select cc y, -1) [AllOnes=1]
12515//
12516// Invert is set when N is the null/all ones constant when CC is false.
12517// OtherOp is set to the alternative value of N.
12519 SDValue &CC, bool &Invert,
12520 SDValue &OtherOp,
12521 SelectionDAG &DAG) {
12522 switch (N->getOpcode()) {
12523 default: return false;
12524 case ISD::SELECT: {
12525 CC = N->getOperand(0);
12526 SDValue N1 = N->getOperand(1);
12527 SDValue N2 = N->getOperand(2);
12528 if (isZeroOrAllOnes(N1, AllOnes)) {
12529 Invert = false;
12530 OtherOp = N2;
12531 return true;
12532 }
12533 if (isZeroOrAllOnes(N2, AllOnes)) {
12534 Invert = true;
12535 OtherOp = N1;
12536 return true;
12537 }
12538 return false;
12539 }
12540 case ISD::ZERO_EXTEND:
12541 // (zext cc) can never be the all ones value.
12542 if (AllOnes)
12543 return false;
12544 [[fallthrough]];
12545 case ISD::SIGN_EXTEND: {
12546 SDLoc dl(N);
12547 EVT VT = N->getValueType(0);
12548 CC = N->getOperand(0);
12549 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12550 return false;
12551 Invert = !AllOnes;
12552 if (AllOnes)
12553 // When looking for an AllOnes constant, N is an sext, and the 'other'
12554 // value is 0.
12555 OtherOp = DAG.getConstant(0, dl, VT);
12556 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12557 // When looking for a 0 constant, N can be zext or sext.
12558 OtherOp = DAG.getConstant(1, dl, VT);
12559 else
12560 OtherOp = DAG.getAllOnesConstant(dl, VT);
12561 return true;
12562 }
12563 }
12564}
12565
12566// Combine a constant select operand into its use:
12567//
12568// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12569// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12570// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12571// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12572// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12573//
12574// The transform is rejected if the select doesn't have a constant operand that
12575// is null, or all ones when AllOnes is set.
12576//
12577// Also recognize sext/zext from i1:
12578//
12579// (add (zext cc), x) -> (select cc (add x, 1), x)
12580// (add (sext cc), x) -> (select cc (add x, -1), x)
12581//
12582// These transformations eventually create predicated instructions.
12583//
12584// @param N The node to transform.
12585// @param Slct The N operand that is a select.
12586// @param OtherOp The other N operand (x above).
12587// @param DCI Context.
12588// @param AllOnes Require the select constant to be all ones instead of null.
12589// @returns The new node, or SDValue() on failure.
12590static
12593 bool AllOnes = false) {
12594 SelectionDAG &DAG = DCI.DAG;
12595 EVT VT = N->getValueType(0);
12596 SDValue NonConstantVal;
12597 SDValue CCOp;
12598 bool SwapSelectOps;
12599 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12600 NonConstantVal, DAG))
12601 return SDValue();
12602
12603 // Slct is now know to be the desired identity constant when CC is true.
12604 SDValue TrueVal = OtherOp;
12605 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12606 OtherOp, NonConstantVal);
12607 // Unless SwapSelectOps says CC should be false.
12608 if (SwapSelectOps)
12609 std::swap(TrueVal, FalseVal);
12610
12611 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12612 CCOp, TrueVal, FalseVal);
12613}
12614
12615// Attempt combineSelectAndUse on each operand of a commutative operator N.
12616static
12619 SDValue N0 = N->getOperand(0);
12620 SDValue N1 = N->getOperand(1);
12621 if (N0.getNode()->hasOneUse())
12622 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12623 return Result;
12624 if (N1.getNode()->hasOneUse())
12625 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12626 return Result;
12627 return SDValue();
12628}
12629
12631 // VUZP shuffle node.
12632 if (N->getOpcode() == ARMISD::VUZP)
12633 return true;
12634
12635 // "VUZP" on i32 is an alias for VTRN.
12636 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12637 return true;
12638
12639 return false;
12640}
12641
12644 const ARMSubtarget *Subtarget) {
12645 // Look for ADD(VUZP.0, VUZP.1).
12646 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12647 N0 == N1)
12648 return SDValue();
12649
12650 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12651 if (!N->getValueType(0).is64BitVector())
12652 return SDValue();
12653
12654 // Generate vpadd.
12655 SelectionDAG &DAG = DCI.DAG;
12656 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12657 SDLoc dl(N);
12658 SDNode *Unzip = N0.getNode();
12659 EVT VT = N->getValueType(0);
12660
12662 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12663 TLI.getPointerTy(DAG.getDataLayout())));
12664 Ops.push_back(Unzip->getOperand(0));
12665 Ops.push_back(Unzip->getOperand(1));
12666
12667 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12668}
12669
12672 const ARMSubtarget *Subtarget) {
12673 // Check for two extended operands.
12674 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12675 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12676 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12677 N1.getOpcode() == ISD::ZERO_EXTEND))
12678 return SDValue();
12679
12680 SDValue N00 = N0.getOperand(0);
12681 SDValue N10 = N1.getOperand(0);
12682
12683 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12684 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12685 N00 == N10)
12686 return SDValue();
12687
12688 // We only recognize Q register paddl here; this can't be reached until
12689 // after type legalization.
12690 if (!N00.getValueType().is64BitVector() ||
12692 return SDValue();
12693
12694 // Generate vpaddl.
12695 SelectionDAG &DAG = DCI.DAG;
12696 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12697 SDLoc dl(N);
12698 EVT VT = N->getValueType(0);
12699
12701 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12702 unsigned Opcode;
12703 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12704 Opcode = Intrinsic::arm_neon_vpaddls;
12705 else
12706 Opcode = Intrinsic::arm_neon_vpaddlu;
12707 Ops.push_back(DAG.getConstant(Opcode, dl,
12708 TLI.getPointerTy(DAG.getDataLayout())));
12709 EVT ElemTy = N00.getValueType().getVectorElementType();
12710 unsigned NumElts = VT.getVectorNumElements();
12711 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12712 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12713 N00.getOperand(0), N00.getOperand(1));
12714 Ops.push_back(Concat);
12715
12716 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12717}
12718
12719// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12720// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12721// much easier to match.
12722static SDValue
12725 const ARMSubtarget *Subtarget) {
12726 // Only perform optimization if after legalize, and if NEON is available. We
12727 // also expected both operands to be BUILD_VECTORs.
12728 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12729 || N0.getOpcode() != ISD::BUILD_VECTOR
12730 || N1.getOpcode() != ISD::BUILD_VECTOR)
12731 return SDValue();
12732
12733 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12734 EVT VT = N->getValueType(0);
12735 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12736 return SDValue();
12737
12738 // Check that the vector operands are of the right form.
12739 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12740 // operands, where N is the size of the formed vector.
12741 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12742 // index such that we have a pair wise add pattern.
12743
12744 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12746 return SDValue();
12747 SDValue Vec = N0->getOperand(0)->getOperand(0);
12748 SDNode *V = Vec.getNode();
12749 unsigned nextIndex = 0;
12750
12751 // For each operands to the ADD which are BUILD_VECTORs,
12752 // check to see if each of their operands are an EXTRACT_VECTOR with
12753 // the same vector and appropriate index.
12754 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12757
12758 SDValue ExtVec0 = N0->getOperand(i);
12759 SDValue ExtVec1 = N1->getOperand(i);
12760
12761 // First operand is the vector, verify its the same.
12762 if (V != ExtVec0->getOperand(0).getNode() ||
12763 V != ExtVec1->getOperand(0).getNode())
12764 return SDValue();
12765
12766 // Second is the constant, verify its correct.
12767 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12768 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12769
12770 // For the constant, we want to see all the even or all the odd.
12771 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12772 || C1->getZExtValue() != nextIndex+1)
12773 return SDValue();
12774
12775 // Increment index.
12776 nextIndex+=2;
12777 } else
12778 return SDValue();
12779 }
12780
12781 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12782 // we're using the entire input vector, otherwise there's a size/legality
12783 // mismatch somewhere.
12784 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12786 return SDValue();
12787
12788 // Create VPADDL node.
12789 SelectionDAG &DAG = DCI.DAG;
12790 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12791
12792 SDLoc dl(N);
12793
12794 // Build operand list.
12796 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12797 TLI.getPointerTy(DAG.getDataLayout())));
12798
12799 // Input is the vector.
12800 Ops.push_back(Vec);
12801
12802 // Get widened type and narrowed type.
12803 MVT widenType;
12804 unsigned numElem = VT.getVectorNumElements();
12805
12806 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12807 switch (inputLaneType.getSimpleVT().SimpleTy) {
12808 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12809 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12810 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12811 default:
12812 llvm_unreachable("Invalid vector element type for padd optimization.");
12813 }
12814
12815 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12816 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12817 return DAG.getNode(ExtOp, dl, VT, tmp);
12818}
12819
12821 if (V->getOpcode() == ISD::UMUL_LOHI ||
12822 V->getOpcode() == ISD::SMUL_LOHI)
12823 return V;
12824 return SDValue();
12825}
12826
12827static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12829 const ARMSubtarget *Subtarget) {
12830 if (!Subtarget->hasBaseDSP())
12831 return SDValue();
12832
12833 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12834 // accumulates the product into a 64-bit value. The 16-bit values will
12835 // be sign extended somehow or SRA'd into 32-bit values
12836 // (addc (adde (mul 16bit, 16bit), lo), hi)
12837 SDValue Mul = AddcNode->getOperand(0);
12838 SDValue Lo = AddcNode->getOperand(1);
12839 if (Mul.getOpcode() != ISD::MUL) {
12840 Lo = AddcNode->getOperand(0);
12841 Mul = AddcNode->getOperand(1);
12842 if (Mul.getOpcode() != ISD::MUL)
12843 return SDValue();
12844 }
12845
12846 SDValue SRA = AddeNode->getOperand(0);
12847 SDValue Hi = AddeNode->getOperand(1);
12848 if (SRA.getOpcode() != ISD::SRA) {
12849 SRA = AddeNode->getOperand(1);
12850 Hi = AddeNode->getOperand(0);
12851 if (SRA.getOpcode() != ISD::SRA)
12852 return SDValue();
12853 }
12854 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12855 if (Const->getZExtValue() != 31)
12856 return SDValue();
12857 } else
12858 return SDValue();
12859
12860 if (SRA.getOperand(0) != Mul)
12861 return SDValue();
12862
12863 SelectionDAG &DAG = DCI.DAG;
12864 SDLoc dl(AddcNode);
12865 unsigned Opcode = 0;
12866 SDValue Op0;
12867 SDValue Op1;
12868
12869 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12870 Opcode = ARMISD::SMLALBB;
12871 Op0 = Mul.getOperand(0);
12872 Op1 = Mul.getOperand(1);
12873 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12874 Opcode = ARMISD::SMLALBT;
12875 Op0 = Mul.getOperand(0);
12876 Op1 = Mul.getOperand(1).getOperand(0);
12877 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12878 Opcode = ARMISD::SMLALTB;
12879 Op0 = Mul.getOperand(0).getOperand(0);
12880 Op1 = Mul.getOperand(1);
12881 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12882 Opcode = ARMISD::SMLALTT;
12883 Op0 = Mul->getOperand(0).getOperand(0);
12884 Op1 = Mul->getOperand(1).getOperand(0);
12885 }
12886
12887 if (!Op0 || !Op1)
12888 return SDValue();
12889
12890 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12891 Op0, Op1, Lo, Hi);
12892 // Replace the ADDs' nodes uses by the MLA node's values.
12893 SDValue HiMLALResult(SMLAL.getNode(), 1);
12894 SDValue LoMLALResult(SMLAL.getNode(), 0);
12895
12896 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12897 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12898
12899 // Return original node to notify the driver to stop replacing.
12900 SDValue resNode(AddcNode, 0);
12901 return resNode;
12902}
12903
12906 const ARMSubtarget *Subtarget) {
12907 // Look for multiply add opportunities.
12908 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12909 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12910 // a glue link from the first add to the second add.
12911 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12912 // a S/UMLAL instruction.
12913 // UMUL_LOHI
12914 // / :lo \ :hi
12915 // V \ [no multiline comment]
12916 // loAdd -> ADDC |
12917 // \ :carry /
12918 // V V
12919 // ADDE <- hiAdd
12920 //
12921 // In the special case where only the higher part of a signed result is used
12922 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12923 // a constant with the exact value of 0x80000000, we recognize we are dealing
12924 // with a "rounded multiply and add" (or subtract) and transform it into
12925 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12926
12927 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12928 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12929 "Expect an ADDE or SUBE");
12930
12931 assert(AddeSubeNode->getNumOperands() == 3 &&
12932 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12933 "ADDE node has the wrong inputs");
12934
12935 // Check that we are chained to the right ADDC or SUBC node.
12936 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12937 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12938 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12939 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12940 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12941 return SDValue();
12942
12943 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12944 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12945
12946 // Check if the two operands are from the same mul_lohi node.
12947 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12948 return SDValue();
12949
12950 assert(AddcSubcNode->getNumValues() == 2 &&
12951 AddcSubcNode->getValueType(0) == MVT::i32 &&
12952 "Expect ADDC with two result values. First: i32");
12953
12954 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12955 // maybe a SMLAL which multiplies two 16-bit values.
12956 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12957 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12958 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12959 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12960 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12961 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12962
12963 // Check for the triangle shape.
12964 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12965 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12966
12967 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12968 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12969 return SDValue();
12970
12971 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12972 bool IsLeftOperandMUL = false;
12973 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12974 if (MULOp == SDValue())
12975 MULOp = findMUL_LOHI(AddeSubeOp1);
12976 else
12977 IsLeftOperandMUL = true;
12978 if (MULOp == SDValue())
12979 return SDValue();
12980
12981 // Figure out the right opcode.
12982 unsigned Opc = MULOp->getOpcode();
12983 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12984
12985 // Figure out the high and low input values to the MLAL node.
12986 SDValue *HiAddSub = nullptr;
12987 SDValue *LoMul = nullptr;
12988 SDValue *LowAddSub = nullptr;
12989
12990 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12991 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12992 return SDValue();
12993
12994 if (IsLeftOperandMUL)
12995 HiAddSub = &AddeSubeOp1;
12996 else
12997 HiAddSub = &AddeSubeOp0;
12998
12999 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13000 // whose low result is fed to the ADDC/SUBC we are checking.
13001
13002 if (AddcSubcOp0 == MULOp.getValue(0)) {
13003 LoMul = &AddcSubcOp0;
13004 LowAddSub = &AddcSubcOp1;
13005 }
13006 if (AddcSubcOp1 == MULOp.getValue(0)) {
13007 LoMul = &AddcSubcOp1;
13008 LowAddSub = &AddcSubcOp0;
13009 }
13010
13011 if (!LoMul)
13012 return SDValue();
13013
13014 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13015 // the replacement below will create a cycle.
13016 if (AddcSubcNode == HiAddSub->getNode() ||
13017 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13018 return SDValue();
13019
13020 // Create the merged node.
13021 SelectionDAG &DAG = DCI.DAG;
13022
13023 // Start building operand list.
13025 Ops.push_back(LoMul->getOperand(0));
13026 Ops.push_back(LoMul->getOperand(1));
13027
13028 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13029 // the case, we must be doing signed multiplication and only use the higher
13030 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13031 // addition or subtraction with the value of 0x800000.
13032 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13033 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13034 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13035 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13036 0x80000000) {
13037 Ops.push_back(*HiAddSub);
13038 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13039 FinalOpc = ARMISD::SMMLSR;
13040 } else {
13041 FinalOpc = ARMISD::SMMLAR;
13042 }
13043 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13044 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13045
13046 return SDValue(AddeSubeNode, 0);
13047 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13048 // SMMLS is generated during instruction selection and the rest of this
13049 // function can not handle the case where AddcSubcNode is a SUBC.
13050 return SDValue();
13051
13052 // Finish building the operand list for {U/S}MLAL
13053 Ops.push_back(*LowAddSub);
13054 Ops.push_back(*HiAddSub);
13055
13056 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13057 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13058
13059 // Replace the ADDs' nodes uses by the MLA node's values.
13060 SDValue HiMLALResult(MLALNode.getNode(), 1);
13061 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13062
13063 SDValue LoMLALResult(MLALNode.getNode(), 0);
13064 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13065
13066 // Return original node to notify the driver to stop replacing.
13067 return SDValue(AddeSubeNode, 0);
13068}
13069
13072 const ARMSubtarget *Subtarget) {
13073 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13074 // While trying to combine for the other MLAL nodes, first search for the
13075 // chance to use UMAAL. Check if Addc uses a node which has already
13076 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13077 // as the addend, and it's handled in PerformUMLALCombine.
13078
13079 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13080 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13081
13082 // Check that we have a glued ADDC node.
13083 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13084 if (AddcNode->getOpcode() != ARMISD::ADDC)
13085 return SDValue();
13086
13087 // Find the converted UMAAL or quit if it doesn't exist.
13088 SDNode *UmlalNode = nullptr;
13089 SDValue AddHi;
13090 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13091 UmlalNode = AddcNode->getOperand(0).getNode();
13092 AddHi = AddcNode->getOperand(1);
13093 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13094 UmlalNode = AddcNode->getOperand(1).getNode();
13095 AddHi = AddcNode->getOperand(0);
13096 } else {
13097 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13098 }
13099
13100 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13101 // the ADDC as well as Zero.
13102 if (!isNullConstant(UmlalNode->getOperand(3)))
13103 return SDValue();
13104
13105 if ((isNullConstant(AddeNode->getOperand(0)) &&
13106 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13107 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13108 isNullConstant(AddeNode->getOperand(1)))) {
13109 SelectionDAG &DAG = DCI.DAG;
13110 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13111 UmlalNode->getOperand(2), AddHi };
13112 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13113 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13114
13115 // Replace the ADDs' nodes uses by the UMAAL node's values.
13116 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13117 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13118
13119 // Return original node to notify the driver to stop replacing.
13120 return SDValue(AddeNode, 0);
13121 }
13122 return SDValue();
13123}
13124
13126 const ARMSubtarget *Subtarget) {
13127 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13128 return SDValue();
13129
13130 // Check that we have a pair of ADDC and ADDE as operands.
13131 // Both addends of the ADDE must be zero.
13132 SDNode* AddcNode = N->getOperand(2).getNode();
13133 SDNode* AddeNode = N->getOperand(3).getNode();
13134 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13135 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13136 isNullConstant(AddeNode->getOperand(0)) &&
13137 isNullConstant(AddeNode->getOperand(1)) &&
13138 (AddeNode->getOperand(2).getNode() == AddcNode))
13139 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13140 DAG.getVTList(MVT::i32, MVT::i32),
13141 {N->getOperand(0), N->getOperand(1),
13142 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13143 else
13144 return SDValue();
13145}
13146
13149 const ARMSubtarget *Subtarget) {
13150 SelectionDAG &DAG(DCI.DAG);
13151
13152 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13153 // (SUBC (ADDE 0, 0, C), 1) -> C
13154 SDValue LHS = N->getOperand(0);
13155 SDValue RHS = N->getOperand(1);
13156 if (LHS->getOpcode() == ARMISD::ADDE &&
13157 isNullConstant(LHS->getOperand(0)) &&
13158 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13159 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13160 }
13161 }
13162
13163 if (Subtarget->isThumb1Only()) {
13164 SDValue RHS = N->getOperand(1);
13165 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13166 int32_t imm = C->getSExtValue();
13167 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13168 SDLoc DL(N);
13169 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13170 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13171 : ARMISD::ADDC;
13172 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13173 }
13174 }
13175 }
13176
13177 return SDValue();
13178}
13179
13182 const ARMSubtarget *Subtarget) {
13183 if (Subtarget->isThumb1Only()) {
13184 SelectionDAG &DAG = DCI.DAG;
13185 SDValue RHS = N->getOperand(1);
13186 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13187 int64_t imm = C->getSExtValue();
13188 if (imm < 0) {
13189 SDLoc DL(N);
13190
13191 // The with-carry-in form matches bitwise not instead of the negation.
13192 // Effectively, the inverse interpretation of the carry flag already
13193 // accounts for part of the negation.
13194 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13195
13196 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13197 : ARMISD::ADDE;
13198 return DAG.getNode(Opcode, DL, N->getVTList(),
13199 N->getOperand(0), RHS, N->getOperand(2));
13200 }
13201 }
13202 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13203 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13204 }
13205 return SDValue();
13206}
13207
13210 const ARMSubtarget *Subtarget) {
13211 if (!Subtarget->hasMVEIntegerOps())
13212 return SDValue();
13213
13214 SDLoc dl(N);
13215 SDValue SetCC;
13216 SDValue LHS;
13217 SDValue RHS;
13219 SDValue TrueVal;
13220 SDValue FalseVal;
13221
13222 if (N->getOpcode() == ISD::SELECT &&
13223 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13224 SetCC = N->getOperand(0);
13225 LHS = SetCC->getOperand(0);
13226 RHS = SetCC->getOperand(1);
13227 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13228 TrueVal = N->getOperand(1);
13229 FalseVal = N->getOperand(2);
13230 } else if (N->getOpcode() == ISD::SELECT_CC) {
13231 LHS = N->getOperand(0);
13232 RHS = N->getOperand(1);
13233 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13234 TrueVal = N->getOperand(2);
13235 FalseVal = N->getOperand(3);
13236 } else {
13237 return SDValue();
13238 }
13239
13240 unsigned int Opcode = 0;
13241 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13242 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13243 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13244 Opcode = ARMISD::VMINVu;
13245 if (CC == ISD::SETUGT)
13246 std::swap(TrueVal, FalseVal);
13247 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13248 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13249 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13250 Opcode = ARMISD::VMINVs;
13251 if (CC == ISD::SETGT)
13252 std::swap(TrueVal, FalseVal);
13253 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13254 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13255 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13256 Opcode = ARMISD::VMAXVu;
13257 if (CC == ISD::SETULT)
13258 std::swap(TrueVal, FalseVal);
13259 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13260 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13261 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13262 Opcode = ARMISD::VMAXVs;
13263 if (CC == ISD::SETLT)
13264 std::swap(TrueVal, FalseVal);
13265 } else
13266 return SDValue();
13267
13268 // Normalise to the right hand side being the vector reduction
13269 switch (TrueVal->getOpcode()) {
13274 std::swap(LHS, RHS);
13275 std::swap(TrueVal, FalseVal);
13276 break;
13277 }
13278
13279 EVT VectorType = FalseVal->getOperand(0).getValueType();
13280
13281 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13282 VectorType != MVT::v4i32)
13283 return SDValue();
13284
13285 EVT VectorScalarType = VectorType.getVectorElementType();
13286
13287 // The values being selected must also be the ones being compared
13288 if (TrueVal != LHS || FalseVal != RHS)
13289 return SDValue();
13290
13291 EVT LeftType = LHS->getValueType(0);
13292 EVT RightType = RHS->getValueType(0);
13293
13294 // The types must match the reduced type too
13295 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13296 return SDValue();
13297
13298 // Legalise the scalar to an i32
13299 if (VectorScalarType != MVT::i32)
13300 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13301
13302 // Generate the reduction as an i32 for legalisation purposes
13303 auto Reduction =
13304 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13305
13306 // The result isn't actually an i32 so truncate it back to its original type
13307 if (VectorScalarType != MVT::i32)
13308 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13309
13310 return Reduction;
13311}
13312
13313// A special combine for the vqdmulh family of instructions. This is one of the
13314// potential set of patterns that could patch this instruction. The base pattern
13315// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13316// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13317// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13318// the max is unnecessary.
13320 EVT VT = N->getValueType(0);
13321 SDValue Shft;
13322 ConstantSDNode *Clamp;
13323
13324 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13325 return SDValue();
13326
13327 if (N->getOpcode() == ISD::SMIN) {
13328 Shft = N->getOperand(0);
13329 Clamp = isConstOrConstSplat(N->getOperand(1));
13330 } else if (N->getOpcode() == ISD::VSELECT) {
13331 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13332 SDValue Cmp = N->getOperand(0);
13333 if (Cmp.getOpcode() != ISD::SETCC ||
13334 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13335 Cmp.getOperand(0) != N->getOperand(1) ||
13336 Cmp.getOperand(1) != N->getOperand(2))
13337 return SDValue();
13338 Shft = N->getOperand(1);
13339 Clamp = isConstOrConstSplat(N->getOperand(2));
13340 } else
13341 return SDValue();
13342
13343 if (!Clamp)
13344 return SDValue();
13345
13346 MVT ScalarType;
13347 int ShftAmt = 0;
13348 switch (Clamp->getSExtValue()) {
13349 case (1 << 7) - 1:
13350 ScalarType = MVT::i8;
13351 ShftAmt = 7;
13352 break;
13353 case (1 << 15) - 1:
13354 ScalarType = MVT::i16;
13355 ShftAmt = 15;
13356 break;
13357 case (1ULL << 31) - 1:
13358 ScalarType = MVT::i32;
13359 ShftAmt = 31;
13360 break;
13361 default:
13362 return SDValue();
13363 }
13364
13365 if (Shft.getOpcode() != ISD::SRA)
13366 return SDValue();
13368 if (!N1 || N1->getSExtValue() != ShftAmt)
13369 return SDValue();
13370
13371 SDValue Mul = Shft.getOperand(0);
13372 if (Mul.getOpcode() != ISD::MUL)
13373 return SDValue();
13374
13375 SDValue Ext0 = Mul.getOperand(0);
13376 SDValue Ext1 = Mul.getOperand(1);
13377 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13378 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13379 return SDValue();
13380 EVT VecVT = Ext0.getOperand(0).getValueType();
13381 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13382 return SDValue();
13383 if (Ext1.getOperand(0).getValueType() != VecVT ||
13384 VecVT.getScalarType() != ScalarType ||
13385 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13386 return SDValue();
13387
13388 SDLoc DL(Mul);
13389 unsigned LegalLanes = 128 / (ShftAmt + 1);
13390 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13391 // For types smaller than legal vectors extend to be legal and only use needed
13392 // lanes.
13393 if (VecVT.getSizeInBits() < 128) {
13394 EVT ExtVecVT =
13396 VecVT.getVectorNumElements());
13397 SDValue Inp0 =
13398 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13399 SDValue Inp1 =
13400 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13401 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13402 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13403 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13404 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13405 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13406 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13407 }
13408
13409 // For larger types, split into legal sized chunks.
13410 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13411 unsigned NumParts = VecVT.getSizeInBits() / 128;
13413 for (unsigned I = 0; I < NumParts; ++I) {
13414 SDValue Inp0 =
13415 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13416 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13417 SDValue Inp1 =
13418 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13419 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13420 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13421 Parts.push_back(VQDMULH);
13422 }
13423 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13424 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13425}
13426
13429 const ARMSubtarget *Subtarget) {
13430 if (!Subtarget->hasMVEIntegerOps())
13431 return SDValue();
13432
13433 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13434 return V;
13435
13436 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13437 //
13438 // We need to re-implement this optimization here as the implementation in the
13439 // Target-Independent DAGCombiner does not handle the kind of constant we make
13440 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13441 // good reason, allowing truncation there would break other targets).
13442 //
13443 // Currently, this is only done for MVE, as it's the only target that benefits
13444 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13445 if (N->getOperand(0).getOpcode() != ISD::XOR)
13446 return SDValue();
13447 SDValue XOR = N->getOperand(0);
13448
13449 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13450 // It is important to check with truncation allowed as the BUILD_VECTORs we
13451 // generate in those situations will truncate their operands.
13452 ConstantSDNode *Const =
13453 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13454 /*AllowTruncation*/ true);
13455 if (!Const || !Const->isOne())
13456 return SDValue();
13457
13458 // Rewrite into vselect(cond, rhs, lhs).
13459 SDValue Cond = XOR->getOperand(0);
13460 SDValue LHS = N->getOperand(1);
13461 SDValue RHS = N->getOperand(2);
13462 EVT Type = N->getValueType(0);
13463 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13464}
13465
13466// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13469 const ARMSubtarget *Subtarget) {
13470 SDValue Op0 = N->getOperand(0);
13471 SDValue Op1 = N->getOperand(1);
13472 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13473 EVT VT = N->getValueType(0);
13474
13475 if (!Subtarget->hasMVEIntegerOps() ||
13477 return SDValue();
13478
13479 if (CC == ISD::SETUGE) {
13480 std::swap(Op0, Op1);
13481 CC = ISD::SETULT;
13482 }
13483
13484 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13486 return SDValue();
13487
13488 // Check first operand is BuildVector of 0,1,2,...
13489 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13490 if (!Op0.getOperand(I).isUndef() &&
13491 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13492 Op0.getConstantOperandVal(I) == I))
13493 return SDValue();
13494 }
13495
13496 // The second is a Splat of Op1S
13497 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13498 if (!Op1S)
13499 return SDValue();
13500
13501 unsigned Opc;
13502 switch (VT.getVectorNumElements()) {
13503 case 2:
13504 Opc = Intrinsic::arm_mve_vctp64;
13505 break;
13506 case 4:
13507 Opc = Intrinsic::arm_mve_vctp32;
13508 break;
13509 case 8:
13510 Opc = Intrinsic::arm_mve_vctp16;
13511 break;
13512 case 16:
13513 Opc = Intrinsic::arm_mve_vctp8;
13514 break;
13515 default:
13516 return SDValue();
13517 }
13518
13519 SDLoc DL(N);
13520 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13521 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13522 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13523}
13524
13525/// PerformADDECombine - Target-specific dag combine transform from
13526/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13527/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13530 const ARMSubtarget *Subtarget) {
13531 // Only ARM and Thumb2 support UMLAL/SMLAL.
13532 if (Subtarget->isThumb1Only())
13533 return PerformAddeSubeCombine(N, DCI, Subtarget);
13534
13535 // Only perform the checks after legalize when the pattern is available.
13536 if (DCI.isBeforeLegalize()) return SDValue();
13537
13538 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13539}
13540
13541/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13542/// operands N0 and N1. This is a helper for PerformADDCombine that is
13543/// called with the default operands, and if that fails, with commuted
13544/// operands.
13547 const ARMSubtarget *Subtarget){
13548 // Attempt to create vpadd for this add.
13549 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13550 return Result;
13551
13552 // Attempt to create vpaddl for this add.
13553 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13554 return Result;
13555 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13556 Subtarget))
13557 return Result;
13558
13559 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13560 if (N0.getNode()->hasOneUse())
13561 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13562 return Result;
13563 return SDValue();
13564}
13565
13567 EVT VT = N->getValueType(0);
13568 SDValue N0 = N->getOperand(0);
13569 SDValue N1 = N->getOperand(1);
13570 SDLoc dl(N);
13571
13572 auto IsVecReduce = [](SDValue Op) {
13573 switch (Op.getOpcode()) {
13574 case ISD::VECREDUCE_ADD:
13575 case ARMISD::VADDVs:
13576 case ARMISD::VADDVu:
13577 case ARMISD::VMLAVs:
13578 case ARMISD::VMLAVu:
13579 return true;
13580 }
13581 return false;
13582 };
13583
13584 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13585 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13586 // add(add(X, vecreduce(Y)), vecreduce(Z))
13587 // to make better use of vaddva style instructions.
13588 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13589 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13590 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13591 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13592 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13593 }
13594 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13595 // add(add(add(A, C), reduce(B)), reduce(D))
13596 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13597 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13598 unsigned N0RedOp = 0;
13599 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13600 N0RedOp = 1;
13601 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13602 return SDValue();
13603 }
13604
13605 unsigned N1RedOp = 0;
13606 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13607 N1RedOp = 1;
13608 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13609 return SDValue();
13610
13611 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13612 N1.getOperand(1 - N1RedOp));
13613 SDValue Add1 =
13614 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13615 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13616 }
13617 return SDValue();
13618 };
13619 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13620 return R;
13621 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13622 return R;
13623
13624 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13625 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13626 // by ascending load offsets. This can help cores prefetch if the order of
13627 // loads is more predictable.
13628 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13629 // Check if two reductions are known to load data where one is before/after
13630 // another. Return negative if N0 loads data before N1, positive if N1 is
13631 // before N0 and 0 otherwise if nothing is known.
13632 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13633 // Look through to the first operand of a MUL, for the VMLA case.
13634 // Currently only looks at the first operand, in the hope they are equal.
13635 if (N0.getOpcode() == ISD::MUL)
13636 N0 = N0.getOperand(0);
13637 if (N1.getOpcode() == ISD::MUL)
13638 N1 = N1.getOperand(0);
13639
13640 // Return true if the two operands are loads to the same object and the
13641 // offset of the first is known to be less than the offset of the second.
13642 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13643 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13644 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13645 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13646 Load1->isIndexed())
13647 return 0;
13648
13649 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13650 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13651
13652 if (!BaseLocDecomp0.getBase() ||
13653 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13654 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13655 return 0;
13656 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13657 return -1;
13658 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13659 return 1;
13660 return 0;
13661 };
13662
13663 SDValue X;
13664 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13665 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13666 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13667 N0.getOperand(1).getOperand(0));
13668 if (IsBefore < 0) {
13669 X = N0.getOperand(0);
13670 N0 = N0.getOperand(1);
13671 } else if (IsBefore > 0) {
13672 X = N0.getOperand(1);
13673 N0 = N0.getOperand(0);
13674 } else
13675 return SDValue();
13676 } else if (IsVecReduce(N0.getOperand(0))) {
13677 X = N0.getOperand(1);
13678 N0 = N0.getOperand(0);
13679 } else if (IsVecReduce(N0.getOperand(1))) {
13680 X = N0.getOperand(0);
13681 N0 = N0.getOperand(1);
13682 } else
13683 return SDValue();
13684 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13685 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13686 // Note this is backward to how you would expect. We create
13687 // add(reduce(load + 16), reduce(load + 0)) so that the
13688 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13689 // the X as VADDV(load + 0)
13690 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13691 } else
13692 return SDValue();
13693
13694 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13695 return SDValue();
13696
13697 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13698 return SDValue();
13699
13700 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13701 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13702 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13703 };
13704 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13705 return R;
13706 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13707 return R;
13708 return SDValue();
13709}
13710
13712 const ARMSubtarget *Subtarget) {
13713 if (!Subtarget->hasMVEIntegerOps())
13714 return SDValue();
13715
13717 return R;
13718
13719 EVT VT = N->getValueType(0);
13720 SDValue N0 = N->getOperand(0);
13721 SDValue N1 = N->getOperand(1);
13722 SDLoc dl(N);
13723
13724 if (VT != MVT::i64)
13725 return SDValue();
13726
13727 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13728 // will look like:
13729 // t1: i32,i32 = ARMISD::VADDLVs x
13730 // t2: i64 = build_pair t1, t1:1
13731 // t3: i64 = add t2, y
13732 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13733 // the add to be simplified separately.
13734 // We also need to check for sext / zext and commutitive adds.
13735 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13736 SDValue NB) {
13737 if (NB->getOpcode() != ISD::BUILD_PAIR)
13738 return SDValue();
13739 SDValue VecRed = NB->getOperand(0);
13740 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13741 VecRed.getResNo() != 0 ||
13742 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13743 return SDValue();
13744
13745 if (VecRed->getOpcode() == OpcodeA) {
13746 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13747 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13748 VecRed.getOperand(0), VecRed.getOperand(1));
13749 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13750 }
13751
13753 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13754
13755 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13756 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13757 Ops.push_back(VecRed->getOperand(I));
13758 SDValue Red =
13759 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13760 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13761 SDValue(Red.getNode(), 1));
13762 };
13763
13764 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13765 return M;
13766 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13767 return M;
13768 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13769 return M;
13770 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13771 return M;
13772 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13773 return M;
13774 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13775 return M;
13776 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13777 return M;
13778 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13779 return M;
13780 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13781 return M;
13782 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13783 return M;
13784 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13785 return M;
13786 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13787 return M;
13788 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13789 return M;
13790 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13791 return M;
13792 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13793 return M;
13794 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13795 return M;
13796 return SDValue();
13797}
13798
13799bool
13801 CombineLevel Level) const {
13802 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13803 N->getOpcode() == ISD::SRL) &&
13804 "Expected shift op");
13805
13806 if (Level == BeforeLegalizeTypes)
13807 return true;
13808
13809 if (N->getOpcode() != ISD::SHL)
13810 return true;
13811
13812 if (Subtarget->isThumb1Only()) {
13813 // Avoid making expensive immediates by commuting shifts. (This logic
13814 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13815 // for free.)
13816 if (N->getOpcode() != ISD::SHL)
13817 return true;
13818 SDValue N1 = N->getOperand(0);
13819 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13820 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13821 return true;
13822 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13823 if (Const->getAPIntValue().ult(256))
13824 return false;
13825 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13826 Const->getAPIntValue().sgt(-256))
13827 return false;
13828 }
13829 return true;
13830 }
13831
13832 // Turn off commute-with-shift transform after legalization, so it doesn't
13833 // conflict with PerformSHLSimplify. (We could try to detect when
13834 // PerformSHLSimplify would trigger more precisely, but it isn't
13835 // really necessary.)
13836 return false;
13837}
13838
13840 const SDNode *N) const {
13841 assert(N->getOpcode() == ISD::XOR &&
13842 (N->getOperand(0).getOpcode() == ISD::SHL ||
13843 N->getOperand(0).getOpcode() == ISD::SRL) &&
13844 "Expected XOR(SHIFT) pattern");
13845
13846 // Only commute if the entire NOT mask is a hidden shifted mask.
13847 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13848 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13849 if (XorC && ShiftC) {
13850 unsigned MaskIdx, MaskLen;
13851 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13852 unsigned ShiftAmt = ShiftC->getZExtValue();
13853 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13854 if (N->getOperand(0).getOpcode() == ISD::SHL)
13855 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13856 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13857 }
13858 }
13859
13860 return false;
13861}
13862
13864 const SDNode *N, CombineLevel Level) const {
13865 assert(((N->getOpcode() == ISD::SHL &&
13866 N->getOperand(0).getOpcode() == ISD::SRL) ||
13867 (N->getOpcode() == ISD::SRL &&
13868 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13869 "Expected shift-shift mask");
13870
13871 if (!Subtarget->isThumb1Only())
13872 return true;
13873
13874 if (Level == BeforeLegalizeTypes)
13875 return true;
13876
13877 return false;
13878}
13879
13881 EVT VT) const {
13882 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13883}
13884
13886 if (!Subtarget->hasNEON()) {
13887 if (Subtarget->isThumb1Only())
13888 return VT.getScalarSizeInBits() <= 32;
13889 return true;
13890 }
13891 return VT.isScalarInteger();
13892}
13893
13895 EVT VT) const {
13896 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13897 return false;
13898
13899 switch (FPVT.getSimpleVT().SimpleTy) {
13900 case MVT::f16:
13901 return Subtarget->hasVFP2Base();
13902 case MVT::f32:
13903 return Subtarget->hasVFP2Base();
13904 case MVT::f64:
13905 return Subtarget->hasFP64();
13906 case MVT::v4f32:
13907 case MVT::v8f16:
13908 return Subtarget->hasMVEFloatOps();
13909 default:
13910 return false;
13911 }
13912}
13913
13916 const ARMSubtarget *ST) {
13917 // Allow the generic combiner to identify potential bswaps.
13918 if (DCI.isBeforeLegalize())
13919 return SDValue();
13920
13921 // DAG combiner will fold:
13922 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13923 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13924 // Other code patterns that can be also be modified have the following form:
13925 // b + ((a << 1) | 510)
13926 // b + ((a << 1) & 510)
13927 // b + ((a << 1) ^ 510)
13928 // b + ((a << 1) + 510)
13929
13930 // Many instructions can perform the shift for free, but it requires both
13931 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13932 // instruction will needed. So, unfold back to the original pattern if:
13933 // - if c1 and c2 are small enough that they don't require mov imms.
13934 // - the user(s) of the node can perform an shl
13935
13936 // No shifted operands for 16-bit instructions.
13937 if (ST->isThumb() && ST->isThumb1Only())
13938 return SDValue();
13939
13940 // Check that all the users could perform the shl themselves.
13941 for (auto *U : N->uses()) {
13942 switch(U->getOpcode()) {
13943 default:
13944 return SDValue();
13945 case ISD::SUB:
13946 case ISD::ADD:
13947 case ISD::AND:
13948 case ISD::OR:
13949 case ISD::XOR:
13950 case ISD::SETCC:
13951 case ARMISD::CMP:
13952 // Check that the user isn't already using a constant because there
13953 // aren't any instructions that support an immediate operand and a
13954 // shifted operand.
13955 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13956 isa<ConstantSDNode>(U->getOperand(1)))
13957 return SDValue();
13958
13959 // Check that it's not already using a shift.
13960 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13961 U->getOperand(1).getOpcode() == ISD::SHL)
13962 return SDValue();
13963 break;
13964 }
13965 }
13966
13967 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13968 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13969 return SDValue();
13970
13971 if (N->getOperand(0).getOpcode() != ISD::SHL)
13972 return SDValue();
13973
13974 SDValue SHL = N->getOperand(0);
13975
13976 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13977 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13978 if (!C1ShlC2 || !C2)
13979 return SDValue();
13980
13981 APInt C2Int = C2->getAPIntValue();
13982 APInt C1Int = C1ShlC2->getAPIntValue();
13983 unsigned C2Width = C2Int.getBitWidth();
13984 if (C2Int.uge(C2Width))
13985 return SDValue();
13986 uint64_t C2Value = C2Int.getZExtValue();
13987
13988 // Check that performing a lshr will not lose any information.
13989 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13990 if ((C1Int & Mask) != C1Int)
13991 return SDValue();
13992
13993 // Shift the first constant.
13994 C1Int.lshrInPlace(C2Int);
13995
13996 // The immediates are encoded as an 8-bit value that can be rotated.
13997 auto LargeImm = [](const APInt &Imm) {
13998 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13999 return Imm.getBitWidth() - Zeros > 8;
14000 };
14001
14002 if (LargeImm(C1Int) || LargeImm(C2Int))
14003 return SDValue();
14004
14005 SelectionDAG &DAG = DCI.DAG;
14006 SDLoc dl(N);
14007 SDValue X = SHL.getOperand(0);
14008 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14009 DAG.getConstant(C1Int, dl, MVT::i32));
14010 // Shift left to compensate for the lshr of C1Int.
14011 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14012
14013 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14014 SHL.dump(); N->dump());
14015 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14016 return Res;
14017}
14018
14019
14020/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14021///
14024 const ARMSubtarget *Subtarget) {
14025 SDValue N0 = N->getOperand(0);
14026 SDValue N1 = N->getOperand(1);
14027
14028 // Only works one way, because it needs an immediate operand.
14029 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14030 return Result;
14031
14032 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14033 return Result;
14034
14035 // First try with the default operand order.
14036 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14037 return Result;
14038
14039 // If that didn't work, try again with the operands commuted.
14040 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14041}
14042
14043// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14044// providing -X is as cheap as X (currently, just a constant).
14046 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14047 return SDValue();
14048 SDValue CSINC = N->getOperand(1);
14049 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14050 return SDValue();
14051
14052 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14053 if (!X)
14054 return SDValue();
14055
14056 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14057 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14058 CSINC.getOperand(0)),
14059 CSINC.getOperand(1), CSINC.getOperand(2),
14060 CSINC.getOperand(3));
14061}
14062
14063/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14064///
14067 const ARMSubtarget *Subtarget) {
14068 SDValue N0 = N->getOperand(0);
14069 SDValue N1 = N->getOperand(1);
14070
14071 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14072 if (N1.getNode()->hasOneUse())
14073 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14074 return Result;
14075
14076 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14077 return R;
14078
14079 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14080 return SDValue();
14081
14082 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14083 // so that we can readily pattern match more mve instructions which can use
14084 // a scalar operand.
14085 SDValue VDup = N->getOperand(1);
14086 if (VDup->getOpcode() != ARMISD::VDUP)
14087 return SDValue();
14088
14089 SDValue VMov = N->getOperand(0);
14090 if (VMov->getOpcode() == ISD::BITCAST)
14091 VMov = VMov->getOperand(0);
14092
14093 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14094 return SDValue();
14095
14096 SDLoc dl(N);
14097 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14098 DCI.DAG.getConstant(0, dl, MVT::i32),
14099 VDup->getOperand(0));
14100 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14101}
14102
14103/// PerformVMULCombine
14104/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14105/// special multiplier accumulator forwarding.
14106/// vmul d3, d0, d2
14107/// vmla d3, d1, d2
14108/// is faster than
14109/// vadd d3, d0, d1
14110/// vmul d3, d3, d2
14111// However, for (A + B) * (A + B),
14112// vadd d2, d0, d1
14113// vmul d3, d0, d2
14114// vmla d3, d1, d2
14115// is slower than
14116// vadd d2, d0, d1
14117// vmul d3, d2, d2
14120 const ARMSubtarget *Subtarget) {
14121 if (!Subtarget->hasVMLxForwarding())
14122 return SDValue();
14123
14124 SelectionDAG &DAG = DCI.DAG;
14125 SDValue N0 = N->getOperand(0);
14126 SDValue N1 = N->getOperand(1);
14127 unsigned Opcode = N0.getOpcode();
14128 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14129 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14130 Opcode = N1.getOpcode();
14131 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14132 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14133 return SDValue();
14134 std::swap(N0, N1);
14135 }
14136
14137 if (N0 == N1)
14138 return SDValue();
14139
14140 EVT VT = N->getValueType(0);
14141 SDLoc DL(N);
14142 SDValue N00 = N0->getOperand(0);
14143 SDValue N01 = N0->getOperand(1);
14144 return DAG.getNode(Opcode, DL, VT,
14145 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14146 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14147}
14148
14150 const ARMSubtarget *Subtarget) {
14151 EVT VT = N->getValueType(0);
14152 if (VT != MVT::v2i64)
14153 return SDValue();
14154
14155 SDValue N0 = N->getOperand(0);
14156 SDValue N1 = N->getOperand(1);
14157
14158 auto IsSignExt = [&](SDValue Op) {
14159 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14160 return SDValue();
14161 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14162 if (VT.getScalarSizeInBits() == 32)
14163 return Op->getOperand(0);
14164 return SDValue();
14165 };
14166 auto IsZeroExt = [&](SDValue Op) {
14167 // Zero extends are a little more awkward. At the point we are matching
14168 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14169 // That might be before of after a bitcast depending on how the and is
14170 // placed. Because this has to look through bitcasts, it is currently only
14171 // supported on LE.
14172 if (!Subtarget->isLittle())
14173 return SDValue();
14174
14175 SDValue And = Op;
14176 if (And->getOpcode() == ISD::BITCAST)
14177 And = And->getOperand(0);
14178 if (And->getOpcode() != ISD::AND)
14179 return SDValue();
14180 SDValue Mask = And->getOperand(1);
14181 if (Mask->getOpcode() == ISD::BITCAST)
14182 Mask = Mask->getOperand(0);
14183
14184 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14185 Mask.getValueType() != MVT::v4i32)
14186 return SDValue();
14187 if (isAllOnesConstant(Mask->getOperand(0)) &&
14188 isNullConstant(Mask->getOperand(1)) &&
14189 isAllOnesConstant(Mask->getOperand(2)) &&
14190 isNullConstant(Mask->getOperand(3)))
14191 return And->getOperand(0);
14192 return SDValue();
14193 };
14194
14195 SDLoc dl(N);
14196 if (SDValue Op0 = IsSignExt(N0)) {
14197 if (SDValue Op1 = IsSignExt(N1)) {
14198 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14199 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14200 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14201 }
14202 }
14203 if (SDValue Op0 = IsZeroExt(N0)) {
14204 if (SDValue Op1 = IsZeroExt(N1)) {
14205 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14206 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14207 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14208 }
14209 }
14210
14211 return SDValue();
14212}
14213
14216 const ARMSubtarget *Subtarget) {
14217 SelectionDAG &DAG = DCI.DAG;
14218
14219 EVT VT = N->getValueType(0);
14220 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14221 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14222
14223 if (Subtarget->isThumb1Only())
14224 return SDValue();
14225
14226 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14227 return SDValue();
14228
14229 if (VT.is64BitVector() || VT.is128BitVector())
14230 return PerformVMULCombine(N, DCI, Subtarget);
14231 if (VT != MVT::i32)
14232 return SDValue();
14233
14234 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14235 if (!C)
14236 return SDValue();
14237
14238 int64_t MulAmt = C->getSExtValue();
14239 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14240
14241 ShiftAmt = ShiftAmt & (32 - 1);
14242 SDValue V = N->getOperand(0);
14243 SDLoc DL(N);
14244
14245 SDValue Res;
14246 MulAmt >>= ShiftAmt;
14247
14248 if (MulAmt >= 0) {
14249 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14250 // (mul x, 2^N + 1) => (add (shl x, N), x)
14251 Res = DAG.getNode(ISD::ADD, DL, VT,
14252 V,
14253 DAG.getNode(ISD::SHL, DL, VT,
14254 V,
14255 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14256 MVT::i32)));
14257 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14258 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14259 Res = DAG.getNode(ISD::SUB, DL, VT,
14260 DAG.getNode(ISD::SHL, DL, VT,
14261 V,
14262 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14263 MVT::i32)),
14264 V);
14265 } else
14266 return SDValue();
14267 } else {
14268 uint64_t MulAmtAbs = -MulAmt;
14269 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14270 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14271 Res = DAG.getNode(ISD::SUB, DL, VT,
14272 V,
14273 DAG.getNode(ISD::SHL, DL, VT,
14274 V,
14275 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14276 MVT::i32)));
14277 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14278 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14279 Res = DAG.getNode(ISD::ADD, DL, VT,
14280 V,
14281 DAG.getNode(ISD::SHL, DL, VT,
14282 V,
14283 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14284 MVT::i32)));
14285 Res = DAG.getNode(ISD::SUB, DL, VT,
14286 DAG.getConstant(0, DL, MVT::i32), Res);
14287 } else
14288 return SDValue();
14289 }
14290
14291 if (ShiftAmt != 0)
14292 Res = DAG.getNode(ISD::SHL, DL, VT,
14293 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14294
14295 // Do not add new nodes to DAG combiner worklist.
14296 DCI.CombineTo(N, Res, false);
14297 return SDValue();
14298}
14299
14302 const ARMSubtarget *Subtarget) {
14303 // Allow DAGCombine to pattern-match before we touch the canonical form.
14304 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14305 return SDValue();
14306
14307 if (N->getValueType(0) != MVT::i32)
14308 return SDValue();
14309
14310 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14311 if (!N1C)
14312 return SDValue();
14313
14314 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14315 // Don't transform uxtb/uxth.
14316 if (C1 == 255 || C1 == 65535)
14317 return SDValue();
14318
14319 SDNode *N0 = N->getOperand(0).getNode();
14320 if (!N0->hasOneUse())
14321 return SDValue();
14322
14323 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14324 return SDValue();
14325
14326 bool LeftShift = N0->getOpcode() == ISD::SHL;
14327
14328 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14329 if (!N01C)
14330 return SDValue();
14331
14332 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14333 if (!C2 || C2 >= 32)
14334 return SDValue();
14335
14336 // Clear irrelevant bits in the mask.
14337 if (LeftShift)
14338 C1 &= (-1U << C2);
14339 else
14340 C1 &= (-1U >> C2);
14341
14342 SelectionDAG &DAG = DCI.DAG;
14343 SDLoc DL(N);
14344
14345 // We have a pattern of the form "(and (shl x, c2) c1)" or
14346 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14347 // transform to a pair of shifts, to save materializing c1.
14348
14349 // First pattern: right shift, then mask off leading bits.
14350 // FIXME: Use demanded bits?
14351 if (!LeftShift && isMask_32(C1)) {
14352 uint32_t C3 = llvm::countl_zero(C1);
14353 if (C2 < C3) {
14354 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14355 DAG.getConstant(C3 - C2, DL, MVT::i32));
14356 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14357 DAG.getConstant(C3, DL, MVT::i32));
14358 }
14359 }
14360
14361 // First pattern, reversed: left shift, then mask off trailing bits.
14362 if (LeftShift && isMask_32(~C1)) {
14363 uint32_t C3 = llvm::countr_zero(C1);
14364 if (C2 < C3) {
14365 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14366 DAG.getConstant(C3 - C2, DL, MVT::i32));
14367 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14368 DAG.getConstant(C3, DL, MVT::i32));
14369 }
14370 }
14371
14372 // Second pattern: left shift, then mask off leading bits.
14373 // FIXME: Use demanded bits?
14374 if (LeftShift && isShiftedMask_32(C1)) {
14375 uint32_t Trailing = llvm::countr_zero(C1);
14376 uint32_t C3 = llvm::countl_zero(C1);
14377 if (Trailing == C2 && C2 + C3 < 32) {
14378 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14379 DAG.getConstant(C2 + C3, DL, MVT::i32));
14380 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14381 DAG.getConstant(C3, DL, MVT::i32));
14382 }
14383 }
14384
14385 // Second pattern, reversed: right shift, then mask off trailing bits.
14386 // FIXME: Handle other patterns of known/demanded bits.
14387 if (!LeftShift && isShiftedMask_32(C1)) {
14388 uint32_t Leading = llvm::countl_zero(C1);
14389 uint32_t C3 = llvm::countr_zero(C1);
14390 if (Leading == C2 && C2 + C3 < 32) {
14391 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14392 DAG.getConstant(C2 + C3, DL, MVT::i32));
14393 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14394 DAG.getConstant(C3, DL, MVT::i32));
14395 }
14396 }
14397
14398 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14399 // if "c1 >> c2" is a cheaper immediate than "c1"
14400 if (LeftShift &&
14401 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14402
14403 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14404 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14405 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14406 DAG.getConstant(C2, DL, MVT::i32));
14407 }
14408
14409 return SDValue();
14410}
14411
14414 const ARMSubtarget *Subtarget) {
14415 // Attempt to use immediate-form VBIC
14416 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14417 SDLoc dl(N);
14418 EVT VT = N->getValueType(0);
14419 SelectionDAG &DAG = DCI.DAG;
14420
14421 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14422 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14423 return SDValue();
14424
14425 APInt SplatBits, SplatUndef;
14426 unsigned SplatBitSize;
14427 bool HasAnyUndefs;
14428 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14429 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14430 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14431 SplatBitSize == 64) {
14432 EVT VbicVT;
14433 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14434 SplatUndef.getZExtValue(), SplatBitSize,
14435 DAG, dl, VbicVT, VT, OtherModImm);
14436 if (Val.getNode()) {
14437 SDValue Input =
14438 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14439 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14440 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14441 }
14442 }
14443 }
14444
14445 if (!Subtarget->isThumb1Only()) {
14446 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14447 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14448 return Result;
14449
14450 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14451 return Result;
14452 }
14453
14454 if (Subtarget->isThumb1Only())
14455 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14456 return Result;
14457
14458 return SDValue();
14459}
14460
14461// Try combining OR nodes to SMULWB, SMULWT.
14464 const ARMSubtarget *Subtarget) {
14465 if (!Subtarget->hasV6Ops() ||
14466 (Subtarget->isThumb() &&
14467 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14468 return SDValue();
14469
14470 SDValue SRL = OR->getOperand(0);
14471 SDValue SHL = OR->getOperand(1);
14472
14473 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14474 SRL = OR->getOperand(1);
14475 SHL = OR->getOperand(0);
14476 }
14477 if (!isSRL16(SRL) || !isSHL16(SHL))
14478 return SDValue();
14479
14480 // The first operands to the shifts need to be the two results from the
14481 // same smul_lohi node.
14482 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14483 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14484 return SDValue();
14485
14486 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14487 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14488 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14489 return SDValue();
14490
14491 // Now we have:
14492 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14493 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14494 // For SMUWB the 16-bit value will signed extended somehow.
14495 // For SMULWT only the SRA is required.
14496 // Check both sides of SMUL_LOHI
14497 SDValue OpS16 = SMULLOHI->getOperand(0);
14498 SDValue OpS32 = SMULLOHI->getOperand(1);
14499
14500 SelectionDAG &DAG = DCI.DAG;
14501 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14502 OpS16 = OpS32;
14503 OpS32 = SMULLOHI->getOperand(0);
14504 }
14505
14506 SDLoc dl(OR);
14507 unsigned Opcode = 0;
14508 if (isS16(OpS16, DAG))
14509 Opcode = ARMISD::SMULWB;
14510 else if (isSRA16(OpS16)) {
14511 Opcode = ARMISD::SMULWT;
14512 OpS16 = OpS16->getOperand(0);
14513 }
14514 else
14515 return SDValue();
14516
14517 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14518 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14519 return SDValue(OR, 0);
14520}
14521
14524 const ARMSubtarget *Subtarget) {
14525 // BFI is only available on V6T2+
14526 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14527 return SDValue();
14528
14529 EVT VT = N->getValueType(0);
14530 SDValue N0 = N->getOperand(0);
14531 SDValue N1 = N->getOperand(1);
14532 SelectionDAG &DAG = DCI.DAG;
14533 SDLoc DL(N);
14534 // 1) or (and A, mask), val => ARMbfi A, val, mask
14535 // iff (val & mask) == val
14536 //
14537 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14538 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14539 // && mask == ~mask2
14540 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14541 // && ~mask == mask2
14542 // (i.e., copy a bitfield value into another bitfield of the same width)
14543
14544 if (VT != MVT::i32)
14545 return SDValue();
14546
14547 SDValue N00 = N0.getOperand(0);
14548
14549 // The value and the mask need to be constants so we can verify this is
14550 // actually a bitfield set. If the mask is 0xffff, we can do better
14551 // via a movt instruction, so don't use BFI in that case.
14552 SDValue MaskOp = N0.getOperand(1);
14553 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14554 if (!MaskC)
14555 return SDValue();
14556 unsigned Mask = MaskC->getZExtValue();
14557 if (Mask == 0xffff)
14558 return SDValue();
14559 SDValue Res;
14560 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14561 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14562 if (N1C) {
14563 unsigned Val = N1C->getZExtValue();
14564 if ((Val & ~Mask) != Val)
14565 return SDValue();
14566
14567 if (ARM::isBitFieldInvertedMask(Mask)) {
14568 Val >>= llvm::countr_zero(~Mask);
14569
14570 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14571 DAG.getConstant(Val, DL, MVT::i32),
14572 DAG.getConstant(Mask, DL, MVT::i32));
14573
14574 DCI.CombineTo(N, Res, false);
14575 // Return value from the original node to inform the combiner than N is
14576 // now dead.
14577 return SDValue(N, 0);
14578 }
14579 } else if (N1.getOpcode() == ISD::AND) {
14580 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14581 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14582 if (!N11C)
14583 return SDValue();
14584 unsigned Mask2 = N11C->getZExtValue();
14585
14586 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14587 // as is to match.
14588 if (ARM::isBitFieldInvertedMask(Mask) &&
14589 (Mask == ~Mask2)) {
14590 // The pack halfword instruction works better for masks that fit it,
14591 // so use that when it's available.
14592 if (Subtarget->hasDSP() &&
14593 (Mask == 0xffff || Mask == 0xffff0000))
14594 return SDValue();
14595 // 2a
14596 unsigned amt = llvm::countr_zero(Mask2);
14597 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14598 DAG.getConstant(amt, DL, MVT::i32));
14599 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14600 DAG.getConstant(Mask, DL, MVT::i32));
14601 DCI.CombineTo(N, Res, false);
14602 // Return value from the original node to inform the combiner than N is
14603 // now dead.
14604 return SDValue(N, 0);
14605 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14606 (~Mask == Mask2)) {
14607 // The pack halfword instruction works better for masks that fit it,
14608 // so use that when it's available.
14609 if (Subtarget->hasDSP() &&
14610 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14611 return SDValue();
14612 // 2b
14613 unsigned lsb = llvm::countr_zero(Mask);
14614 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14615 DAG.getConstant(lsb, DL, MVT::i32));
14616 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14617 DAG.getConstant(Mask2, DL, MVT::i32));
14618 DCI.CombineTo(N, Res, false);
14619 // Return value from the original node to inform the combiner than N is
14620 // now dead.
14621 return SDValue(N, 0);
14622 }
14623 }
14624
14625 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14626 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14628 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14629 // where lsb(mask) == #shamt and masked bits of B are known zero.
14630 SDValue ShAmt = N00.getOperand(1);
14631 unsigned ShAmtC = ShAmt->getAsZExtVal();
14632 unsigned LSB = llvm::countr_zero(Mask);
14633 if (ShAmtC != LSB)
14634 return SDValue();
14635
14636 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14637 DAG.getConstant(~Mask, DL, MVT::i32));
14638
14639 DCI.CombineTo(N, Res, false);
14640 // Return value from the original node to inform the combiner than N is
14641 // now dead.
14642 return SDValue(N, 0);
14643 }
14644
14645 return SDValue();
14646}
14647
14648static bool isValidMVECond(unsigned CC, bool IsFloat) {
14649 switch (CC) {
14650 case ARMCC::EQ:
14651 case ARMCC::NE:
14652 case ARMCC::LE:
14653 case ARMCC::GT:
14654 case ARMCC::GE:
14655 case ARMCC::LT:
14656 return true;
14657 case ARMCC::HS:
14658 case ARMCC::HI:
14659 return !IsFloat;
14660 default:
14661 return false;
14662 };
14663}
14664
14666 if (N->getOpcode() == ARMISD::VCMP)
14667 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14668 else if (N->getOpcode() == ARMISD::VCMPZ)
14669 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14670 else
14671 llvm_unreachable("Not a VCMP/VCMPZ!");
14672}
14673
14676 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14677}
14678
14680 const ARMSubtarget *Subtarget) {
14681 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14682 // together with predicates
14683 EVT VT = N->getValueType(0);
14684 SDLoc DL(N);
14685 SDValue N0 = N->getOperand(0);
14686 SDValue N1 = N->getOperand(1);
14687
14688 auto IsFreelyInvertable = [&](SDValue V) {
14689 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14690 return CanInvertMVEVCMP(V);
14691 return false;
14692 };
14693
14694 // At least one operand must be freely invertable.
14695 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14696 return SDValue();
14697
14698 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14699 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14700 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14701 return DAG.getLogicalNOT(DL, And, VT);
14702}
14703
14704/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14707 const ARMSubtarget *Subtarget) {
14708 // Attempt to use immediate-form VORR
14709 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14710 SDLoc dl(N);
14711 EVT VT = N->getValueType(0);
14712 SelectionDAG &DAG = DCI.DAG;
14713
14714 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14715 return SDValue();
14716
14717 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14718 VT == MVT::v8i1 || VT == MVT::v16i1))
14719 return PerformORCombine_i1(N, DAG, Subtarget);
14720
14721 APInt SplatBits, SplatUndef;
14722 unsigned SplatBitSize;
14723 bool HasAnyUndefs;
14724 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14725 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14726 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14727 SplatBitSize == 64) {
14728 EVT VorrVT;
14729 SDValue Val =
14730 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14731 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14732 if (Val.getNode()) {
14733 SDValue Input =
14734 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14735 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14736 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14737 }
14738 }
14739 }
14740
14741 if (!Subtarget->isThumb1Only()) {
14742 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14743 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14744 return Result;
14745 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14746 return Result;
14747 }
14748
14749 SDValue N0 = N->getOperand(0);
14750 SDValue N1 = N->getOperand(1);
14751
14752 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14753 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14755
14756 // The code below optimizes (or (and X, Y), Z).
14757 // The AND operand needs to have a single user to make these optimizations
14758 // profitable.
14759 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14760 return SDValue();
14761
14762 APInt SplatUndef;
14763 unsigned SplatBitSize;
14764 bool HasAnyUndefs;
14765
14766 APInt SplatBits0, SplatBits1;
14767 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14768 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14769 // Ensure that the second operand of both ands are constants
14770 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14771 HasAnyUndefs) && !HasAnyUndefs) {
14772 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14773 HasAnyUndefs) && !HasAnyUndefs) {
14774 // Ensure that the bit width of the constants are the same and that
14775 // the splat arguments are logical inverses as per the pattern we
14776 // are trying to simplify.
14777 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14778 SplatBits0 == ~SplatBits1) {
14779 // Canonicalize the vector type to make instruction selection
14780 // simpler.
14781 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14782 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14783 N0->getOperand(1),
14784 N0->getOperand(0),
14785 N1->getOperand(0));
14786 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14787 }
14788 }
14789 }
14790 }
14791
14792 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14793 // reasonable.
14794 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14795 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14796 return Res;
14797 }
14798
14799 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14800 return Result;
14801
14802 return SDValue();
14803}
14804
14807 const ARMSubtarget *Subtarget) {
14808 EVT VT = N->getValueType(0);
14809 SelectionDAG &DAG = DCI.DAG;
14810
14811 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14812 return SDValue();
14813
14814 if (!Subtarget->isThumb1Only()) {
14815 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14816 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14817 return Result;
14818
14819 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14820 return Result;
14821 }
14822
14823 if (Subtarget->hasMVEIntegerOps()) {
14824 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14825 SDValue N0 = N->getOperand(0);
14826 SDValue N1 = N->getOperand(1);
14827 const TargetLowering *TLI = Subtarget->getTargetLowering();
14828 if (TLI->isConstTrueVal(N1) &&
14829 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14830 if (CanInvertMVEVCMP(N0)) {
14831 SDLoc DL(N0);
14833
14835 Ops.push_back(N0->getOperand(0));
14836 if (N0->getOpcode() == ARMISD::VCMP)
14837 Ops.push_back(N0->getOperand(1));
14838 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14839 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14840 }
14841 }
14842 }
14843
14844 return SDValue();
14845}
14846
14847// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14848// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14849// their position in "to" (Rd).
14850static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14851 assert(N->getOpcode() == ARMISD::BFI);
14852
14853 SDValue From = N->getOperand(1);
14854 ToMask = ~N->getConstantOperandAPInt(2);
14855 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14856
14857 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14858 // #C in the base of the SHR.
14859 if (From->getOpcode() == ISD::SRL &&
14860 isa<ConstantSDNode>(From->getOperand(1))) {
14861 APInt Shift = From->getConstantOperandAPInt(1);
14862 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14863 FromMask <<= Shift.getLimitedValue(31);
14864 From = From->getOperand(0);
14865 }
14866
14867 return From;
14868}
14869
14870// If A and B contain one contiguous set of bits, does A | B == A . B?
14871//
14872// Neither A nor B must be zero.
14873static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14874 unsigned LastActiveBitInA = A.countr_zero();
14875 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14876 return LastActiveBitInA - 1 == FirstActiveBitInB;
14877}
14878
14880 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14881 APInt ToMask, FromMask;
14882 SDValue From = ParseBFI(N, ToMask, FromMask);
14883 SDValue To = N->getOperand(0);
14884
14885 SDValue V = To;
14886 if (V.getOpcode() != ARMISD::BFI)
14887 return SDValue();
14888
14889 APInt NewToMask, NewFromMask;
14890 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14891 if (NewFrom != From)
14892 return SDValue();
14893
14894 // Do the written bits conflict with any we've seen so far?
14895 if ((NewToMask & ToMask).getBoolValue())
14896 // Conflicting bits.
14897 return SDValue();
14898
14899 // Are the new bits contiguous when combined with the old bits?
14900 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14901 BitsProperlyConcatenate(FromMask, NewFromMask))
14902 return V;
14903 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14904 BitsProperlyConcatenate(NewFromMask, FromMask))
14905 return V;
14906
14907 return SDValue();
14908}
14909
14911 SDValue N0 = N->getOperand(0);
14912 SDValue N1 = N->getOperand(1);
14913
14914 if (N1.getOpcode() == ISD::AND) {
14915 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14916 // the bits being cleared by the AND are not demanded by the BFI.
14917 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14918 if (!N11C)
14919 return SDValue();
14920 unsigned InvMask = N->getConstantOperandVal(2);
14921 unsigned LSB = llvm::countr_zero(~InvMask);
14922 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14923 assert(Width <
14924 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14925 "undefined behavior");
14926 unsigned Mask = (1u << Width) - 1;
14927 unsigned Mask2 = N11C->getZExtValue();
14928 if ((Mask & (~Mask2)) == 0)
14929 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14930 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14931 return SDValue();
14932 }
14933
14934 // Look for another BFI to combine with.
14935 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14936 // We've found a BFI.
14937 APInt ToMask1, FromMask1;
14938 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14939
14940 APInt ToMask2, FromMask2;
14941 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14942 assert(From1 == From2);
14943 (void)From2;
14944
14945 // Create a new BFI, combining the two together.
14946 APInt NewFromMask = FromMask1 | FromMask2;
14947 APInt NewToMask = ToMask1 | ToMask2;
14948
14949 EVT VT = N->getValueType(0);
14950 SDLoc dl(N);
14951
14952 if (NewFromMask[0] == 0)
14953 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14954 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14955 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14956 DAG.getConstant(~NewToMask, dl, VT));
14957 }
14958
14959 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14960 // that lower bit insertions are performed first, providing that M1 and M2
14961 // do no overlap. This can allow multiple BFI instructions to be combined
14962 // together by the other folds above.
14963 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14964 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14965 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14966
14967 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14968 ToMask1.countl_zero() < ToMask2.countl_zero())
14969 return SDValue();
14970
14971 EVT VT = N->getValueType(0);
14972 SDLoc dl(N);
14973 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14974 N->getOperand(1), N->getOperand(2));
14975 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14976 N0.getOperand(2));
14977 }
14978
14979 return SDValue();
14980}
14981
14982// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14983// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14984// return X if valid.
14986 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14987 return SDValue();
14988 SDValue CSInc = Cmp->getOperand(0);
14989
14990 // Ignore any `And 1` nodes that may not yet have been removed. We are
14991 // looking for a value that produces 1/0, so these have no effect on the
14992 // code.
14993 while (CSInc.getOpcode() == ISD::AND &&
14994 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14995 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14996 CSInc = CSInc.getOperand(0);
14997
14998 if (CSInc.getOpcode() == ARMISD::CSINC &&
14999 isNullConstant(CSInc.getOperand(0)) &&
15000 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15002 return CSInc.getOperand(3);
15003 }
15004 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15005 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15007 return CSInc.getOperand(4);
15008 }
15009 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15010 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15013 return CSInc.getOperand(4);
15014 }
15015 return SDValue();
15016}
15017
15019 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15020 // t92: glue = ARMISD::CMPZ t74, 0
15021 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15022 // t96: glue = ARMISD::CMPZ t93, 0
15023 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15025 if (SDValue C = IsCMPZCSINC(N, Cond))
15026 if (Cond == ARMCC::EQ)
15027 return C;
15028 return SDValue();
15029}
15030
15032 // Fold away an unneccessary CMPZ/CSINC
15033 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15034 // if C1==EQ -> CSXYZ A, B, C2, D
15035 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15037 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15038 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15039 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15040 N->getOperand(1),
15041 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15042 if (N->getConstantOperandVal(2) == ARMCC::NE)
15043 return DAG.getNode(
15044 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15045 N->getOperand(1),
15047 }
15048 return SDValue();
15049}
15050
15051/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15052/// ARMISD::VMOVRRD.
15055 const ARMSubtarget *Subtarget) {
15056 // vmovrrd(vmovdrr x, y) -> x,y
15057 SDValue InDouble = N->getOperand(0);
15058 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15059 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15060
15061 // vmovrrd(load f64) -> (load i32), (load i32)
15062 SDNode *InNode = InDouble.getNode();
15063 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15064 InNode->getValueType(0) == MVT::f64 &&
15065 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15066 !cast<LoadSDNode>(InNode)->isVolatile()) {
15067 // TODO: Should this be done for non-FrameIndex operands?
15068 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15069
15070 SelectionDAG &DAG = DCI.DAG;
15071 SDLoc DL(LD);
15072 SDValue BasePtr = LD->getBasePtr();
15073 SDValue NewLD1 =
15074 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15075 LD->getAlign(), LD->getMemOperand()->getFlags());
15076
15077 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15078 DAG.getConstant(4, DL, MVT::i32));
15079
15080 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15081 LD->getPointerInfo().getWithOffset(4),
15082 commonAlignment(LD->getAlign(), 4),
15083 LD->getMemOperand()->getFlags());
15084
15085 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15086 if (DCI.DAG.getDataLayout().isBigEndian())
15087 std::swap (NewLD1, NewLD2);
15088 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15089 return Result;
15090 }
15091
15092 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15093 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15094 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15095 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15096 SDValue BV = InDouble.getOperand(0);
15097 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15098 // change lane order under big endian.
15099 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15100 while (
15101 (BV.getOpcode() == ISD::BITCAST ||
15103 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15104 BVSwap = BV.getOpcode() == ISD::BITCAST;
15105 BV = BV.getOperand(0);
15106 }
15107 if (BV.getValueType() != MVT::v4i32)
15108 return SDValue();
15109
15110 // Handle buildvectors, pulling out the correct lane depending on
15111 // endianness.
15112 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15113 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15114 SDValue Op0 = BV.getOperand(Offset);
15115 SDValue Op1 = BV.getOperand(Offset + 1);
15116 if (!Subtarget->isLittle() && BVSwap)
15117 std::swap(Op0, Op1);
15118
15119 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15120 }
15121
15122 // A chain of insert_vectors, grabbing the correct value of the chain of
15123 // inserts.
15124 SDValue Op0, Op1;
15125 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15126 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15127 if (BV.getConstantOperandVal(2) == Offset)
15128 Op0 = BV.getOperand(1);
15129 if (BV.getConstantOperandVal(2) == Offset + 1)
15130 Op1 = BV.getOperand(1);
15131 }
15132 BV = BV.getOperand(0);
15133 }
15134 if (!Subtarget->isLittle() && BVSwap)
15135 std::swap(Op0, Op1);
15136 if (Op0 && Op1)
15137 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15138 }
15139
15140 return SDValue();
15141}
15142
15143/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15144/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15146 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15147 SDValue Op0 = N->getOperand(0);
15148 SDValue Op1 = N->getOperand(1);
15149 if (Op0.getOpcode() == ISD::BITCAST)
15150 Op0 = Op0.getOperand(0);
15151 if (Op1.getOpcode() == ISD::BITCAST)
15152 Op1 = Op1.getOperand(0);
15153 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15154 Op0.getNode() == Op1.getNode() &&
15155 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15156 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15157 N->getValueType(0), Op0.getOperand(0));
15158 return SDValue();
15159}
15160
15163 SDValue Op0 = N->getOperand(0);
15164
15165 // VMOVhr (VMOVrh (X)) -> X
15166 if (Op0->getOpcode() == ARMISD::VMOVrh)
15167 return Op0->getOperand(0);
15168
15169 // FullFP16: half values are passed in S-registers, and we don't
15170 // need any of the bitcast and moves:
15171 //
15172 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15173 // t5: i32 = bitcast t2
15174 // t18: f16 = ARMISD::VMOVhr t5
15175 // =>
15176 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15177 if (Op0->getOpcode() == ISD::BITCAST) {
15178 SDValue Copy = Op0->getOperand(0);
15179 if (Copy.getValueType() == MVT::f32 &&
15180 Copy->getOpcode() == ISD::CopyFromReg) {
15181 bool HasGlue = Copy->getNumOperands() == 3;
15182 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15183 HasGlue ? Copy->getOperand(2) : SDValue()};
15184 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15185 SDValue NewCopy =
15187 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15188 ArrayRef(Ops, HasGlue ? 3 : 2));
15189
15190 // Update Users, Chains, and Potential Glue.
15191 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15192 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15193 if (HasGlue)
15194 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15195 NewCopy.getValue(2));
15196
15197 return NewCopy;
15198 }
15199 }
15200
15201 // fold (VMOVhr (load x)) -> (load (f16*)x)
15202 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15203 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15204 LN0->getMemoryVT() == MVT::i16) {
15205 SDValue Load =
15206 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15207 LN0->getBasePtr(), LN0->getMemOperand());
15208 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15209 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15210 return Load;
15211 }
15212 }
15213
15214 // Only the bottom 16 bits of the source register are used.
15215 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15216 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15217 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15218 return SDValue(N, 0);
15219
15220 return SDValue();
15221}
15222
15224 SDValue N0 = N->getOperand(0);
15225 EVT VT = N->getValueType(0);
15226
15227 // fold (VMOVrh (fpconst x)) -> const x
15228 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15229 APFloat V = C->getValueAPF();
15230 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15231 }
15232
15233 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15234 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15235 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15236
15237 SDValue Load =
15238 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15239 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15240 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15241 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15242 return Load;
15243 }
15244
15245 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15246 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15247 isa<ConstantSDNode>(N0->getOperand(1)))
15248 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15249 N0->getOperand(1));
15250
15251 return SDValue();
15252}
15253
15254/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15255/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15256/// i64 vector to have f64 elements, since the value can then be loaded
15257/// directly into a VFP register.
15259 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15260 for (unsigned i = 0; i < NumElts; ++i) {
15261 SDNode *Elt = N->getOperand(i).getNode();
15262 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15263 return true;
15264 }
15265 return false;
15266}
15267
15268/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15269/// ISD::BUILD_VECTOR.
15272 const ARMSubtarget *Subtarget) {
15273 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15274 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15275 // into a pair of GPRs, which is fine when the value is used as a scalar,
15276 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15277 SelectionDAG &DAG = DCI.DAG;
15278 if (N->getNumOperands() == 2)
15279 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15280 return RV;
15281
15282 // Load i64 elements as f64 values so that type legalization does not split
15283 // them up into i32 values.
15284 EVT VT = N->getValueType(0);
15285 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15286 return SDValue();
15287 SDLoc dl(N);
15289 unsigned NumElts = VT.getVectorNumElements();
15290 for (unsigned i = 0; i < NumElts; ++i) {
15291 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15292 Ops.push_back(V);
15293 // Make the DAGCombiner fold the bitcast.
15294 DCI.AddToWorklist(V.getNode());
15295 }
15296 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15297 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15298 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15299}
15300
15301/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15302static SDValue
15304 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15305 // At that time, we may have inserted bitcasts from integer to float.
15306 // If these bitcasts have survived DAGCombine, change the lowering of this
15307 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15308 // force to use floating point types.
15309
15310 // Make sure we can change the type of the vector.
15311 // This is possible iff:
15312 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15313 // 1.1. Vector is used only once.
15314 // 1.2. Use is a bit convert to an integer type.
15315 // 2. The size of its operands are 32-bits (64-bits are not legal).
15316 EVT VT = N->getValueType(0);
15317 EVT EltVT = VT.getVectorElementType();
15318
15319 // Check 1.1. and 2.
15320 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15321 return SDValue();
15322
15323 // By construction, the input type must be float.
15324 assert(EltVT == MVT::f32 && "Unexpected type!");
15325
15326 // Check 1.2.
15327 SDNode *Use = *N->use_begin();
15328 if (Use->getOpcode() != ISD::BITCAST ||
15329 Use->getValueType(0).isFloatingPoint())
15330 return SDValue();
15331
15332 // Check profitability.
15333 // Model is, if more than half of the relevant operands are bitcast from
15334 // i32, turn the build_vector into a sequence of insert_vector_elt.
15335 // Relevant operands are everything that is not statically
15336 // (i.e., at compile time) bitcasted.
15337 unsigned NumOfBitCastedElts = 0;
15338 unsigned NumElts = VT.getVectorNumElements();
15339 unsigned NumOfRelevantElts = NumElts;
15340 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15341 SDValue Elt = N->getOperand(Idx);
15342 if (Elt->getOpcode() == ISD::BITCAST) {
15343 // Assume only bit cast to i32 will go away.
15344 if (Elt->getOperand(0).getValueType() == MVT::i32)
15345 ++NumOfBitCastedElts;
15346 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15347 // Constants are statically casted, thus do not count them as
15348 // relevant operands.
15349 --NumOfRelevantElts;
15350 }
15351
15352 // Check if more than half of the elements require a non-free bitcast.
15353 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15354 return SDValue();
15355
15356 SelectionDAG &DAG = DCI.DAG;
15357 // Create the new vector type.
15358 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15359 // Check if the type is legal.
15360 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15361 if (!TLI.isTypeLegal(VecVT))
15362 return SDValue();
15363
15364 // Combine:
15365 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15366 // => BITCAST INSERT_VECTOR_ELT
15367 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15368 // (BITCAST EN), N.
15369 SDValue Vec = DAG.getUNDEF(VecVT);
15370 SDLoc dl(N);
15371 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15372 SDValue V = N->getOperand(Idx);
15373 if (V.isUndef())
15374 continue;
15375 if (V.getOpcode() == ISD::BITCAST &&
15376 V->getOperand(0).getValueType() == MVT::i32)
15377 // Fold obvious case.
15378 V = V.getOperand(0);
15379 else {
15380 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15381 // Make the DAGCombiner fold the bitcasts.
15382 DCI.AddToWorklist(V.getNode());
15383 }
15384 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15385 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15386 }
15387 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15388 // Make the DAGCombiner fold the bitcasts.
15389 DCI.AddToWorklist(Vec.getNode());
15390 return Vec;
15391}
15392
15393static SDValue
15395 EVT VT = N->getValueType(0);
15396 SDValue Op = N->getOperand(0);
15397 SDLoc dl(N);
15398
15399 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15400 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15401 // If the valuetypes are the same, we can remove the cast entirely.
15402 if (Op->getOperand(0).getValueType() == VT)
15403 return Op->getOperand(0);
15404 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15405 }
15406
15407 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15408 // more VPNOT which might get folded as else predicates.
15409 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15410 SDValue X =
15411 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15413 DCI.DAG.getConstant(65535, dl, MVT::i32));
15414 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15415 }
15416
15417 // Only the bottom 16 bits of the source register are used.
15418 if (Op.getValueType() == MVT::i32) {
15419 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15420 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15421 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15422 return SDValue(N, 0);
15423 }
15424 return SDValue();
15425}
15426
15428 const ARMSubtarget *ST) {
15429 EVT VT = N->getValueType(0);
15430 SDValue Op = N->getOperand(0);
15431 SDLoc dl(N);
15432
15433 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15434 if (ST->isLittle())
15435 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15436
15437 // VECTOR_REG_CAST undef -> undef
15438 if (Op.isUndef())
15439 return DAG.getUNDEF(VT);
15440
15441 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15442 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15443 // If the valuetypes are the same, we can remove the cast entirely.
15444 if (Op->getOperand(0).getValueType() == VT)
15445 return Op->getOperand(0);
15446 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15447 }
15448
15449 return SDValue();
15450}
15451
15453 const ARMSubtarget *Subtarget) {
15454 if (!Subtarget->hasMVEIntegerOps())
15455 return SDValue();
15456
15457 EVT VT = N->getValueType(0);
15458 SDValue Op0 = N->getOperand(0);
15459 SDValue Op1 = N->getOperand(1);
15460 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15461 SDLoc dl(N);
15462
15463 // vcmp X, 0, cc -> vcmpz X, cc
15464 if (isZeroVector(Op1))
15465 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15466
15467 unsigned SwappedCond = getSwappedCondition(Cond);
15468 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15469 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15470 if (isZeroVector(Op0))
15471 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15472 DAG.getConstant(SwappedCond, dl, MVT::i32));
15473 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15474 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15475 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15476 DAG.getConstant(SwappedCond, dl, MVT::i32));
15477 }
15478
15479 return SDValue();
15480}
15481
15482/// PerformInsertEltCombine - Target-specific dag combine xforms for
15483/// ISD::INSERT_VECTOR_ELT.
15486 // Bitcast an i64 load inserted into a vector to f64.
15487 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15488 EVT VT = N->getValueType(0);
15489 SDNode *Elt = N->getOperand(1).getNode();
15490 if (VT.getVectorElementType() != MVT::i64 ||
15491 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15492 return SDValue();
15493
15494 SelectionDAG &DAG = DCI.DAG;
15495 SDLoc dl(N);
15496 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15498 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15499 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15500 // Make the DAGCombiner fold the bitcasts.
15501 DCI.AddToWorklist(Vec.getNode());
15502 DCI.AddToWorklist(V.getNode());
15503 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15504 Vec, V, N->getOperand(2));
15505 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15506}
15507
15508// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15509// directly or bitcast to an integer if the original is a float vector.
15510// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15511// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15512static SDValue
15514 EVT VT = N->getValueType(0);
15515 SDLoc dl(N);
15516
15517 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15518 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15519 return SDValue();
15520
15521 SDValue Ext = SDValue(N, 0);
15522 if (Ext.getOpcode() == ISD::BITCAST &&
15523 Ext.getOperand(0).getValueType() == MVT::f32)
15524 Ext = Ext.getOperand(0);
15525 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15526 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15527 Ext.getConstantOperandVal(1) % 2 != 0)
15528 return SDValue();
15529 if (Ext->use_size() == 1 &&
15530 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15531 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15532 return SDValue();
15533
15534 SDValue Op0 = Ext.getOperand(0);
15535 EVT VecVT = Op0.getValueType();
15536 unsigned ResNo = Op0.getResNo();
15537 unsigned Lane = Ext.getConstantOperandVal(1);
15538 if (VecVT.getVectorNumElements() != 4)
15539 return SDValue();
15540
15541 // Find another extract, of Lane + 1
15542 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15543 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15544 isa<ConstantSDNode>(V->getOperand(1)) &&
15545 V->getConstantOperandVal(1) == Lane + 1 &&
15546 V->getOperand(0).getResNo() == ResNo;
15547 });
15548 if (OtherIt == Op0->uses().end())
15549 return SDValue();
15550
15551 // For float extracts, we need to be converting to a i32 for both vector
15552 // lanes.
15553 SDValue OtherExt(*OtherIt, 0);
15554 if (OtherExt.getValueType() != MVT::i32) {
15555 if (OtherExt->use_size() != 1 ||
15556 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15557 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15558 return SDValue();
15559 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15560 }
15561
15562 // Convert the type to a f64 and extract with a VMOVRRD.
15563 SDValue F64 = DCI.DAG.getNode(
15564 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15565 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15566 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15567 SDValue VMOVRRD =
15568 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15569
15570 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15571 return VMOVRRD;
15572}
15573
15576 const ARMSubtarget *ST) {
15577 SDValue Op0 = N->getOperand(0);
15578 EVT VT = N->getValueType(0);
15579 SDLoc dl(N);
15580
15581 // extract (vdup x) -> x
15582 if (Op0->getOpcode() == ARMISD::VDUP) {
15583 SDValue X = Op0->getOperand(0);
15584 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15585 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15586 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15587 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15588 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15589 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15590
15591 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15592 X = X->getOperand(0);
15593 if (X.getValueType() == VT)
15594 return X;
15595 }
15596
15597 // extract ARM_BUILD_VECTOR -> x
15598 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15599 isa<ConstantSDNode>(N->getOperand(1)) &&
15600 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15601 return Op0.getOperand(N->getConstantOperandVal(1));
15602 }
15603
15604 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15605 if (Op0.getValueType() == MVT::v4i32 &&
15606 isa<ConstantSDNode>(N->getOperand(1)) &&
15607 Op0.getOpcode() == ISD::BITCAST &&
15609 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15610 SDValue BV = Op0.getOperand(0);
15611 unsigned Offset = N->getConstantOperandVal(1);
15612 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15613 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15614 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15615 }
15616
15617 // extract x, n; extract x, n+1 -> VMOVRRD x
15618 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15619 return R;
15620
15621 // extract (MVETrunc(x)) -> extract x
15622 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15623 unsigned Idx = N->getConstantOperandVal(1);
15624 unsigned Vec =
15626 unsigned SubIdx =
15628 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15629 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15630 }
15631
15632 return SDValue();
15633}
15634
15636 SDValue Op = N->getOperand(0);
15637 EVT VT = N->getValueType(0);
15638
15639 // sext_inreg(VGETLANEu) -> VGETLANEs
15640 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15641 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15642 Op.getOperand(0).getValueType().getScalarType())
15643 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15644 Op.getOperand(1));
15645
15646 return SDValue();
15647}
15648
15649static SDValue
15651 SDValue Vec = N->getOperand(0);
15652 SDValue SubVec = N->getOperand(1);
15653 uint64_t IdxVal = N->getConstantOperandVal(2);
15654 EVT VecVT = Vec.getValueType();
15655 EVT SubVT = SubVec.getValueType();
15656
15657 // Only do this for legal fixed vector types.
15658 if (!VecVT.isFixedLengthVector() ||
15659 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15661 return SDValue();
15662
15663 // Ignore widening patterns.
15664 if (IdxVal == 0 && Vec.isUndef())
15665 return SDValue();
15666
15667 // Subvector must be half the width and an "aligned" insertion.
15668 unsigned NumSubElts = SubVT.getVectorNumElements();
15669 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15670 (IdxVal != 0 && IdxVal != NumSubElts))
15671 return SDValue();
15672
15673 // Fold insert_subvector -> concat_vectors
15674 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15675 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15676 SDLoc DL(N);
15677 SDValue Lo, Hi;
15678 if (IdxVal == 0) {
15679 Lo = SubVec;
15680 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15681 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15682 } else {
15683 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15684 DCI.DAG.getVectorIdxConstant(0, DL));
15685 Hi = SubVec;
15686 }
15687 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15688}
15689
15690// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15692 SelectionDAG &DAG) {
15693 SDValue Trunc = N->getOperand(0);
15694 EVT VT = Trunc.getValueType();
15695 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15696 return SDValue();
15697
15698 SDLoc DL(Trunc);
15699 if (isVMOVNTruncMask(N->getMask(), VT, false))
15700 return DAG.getNode(
15701 ARMISD::VMOVN, DL, VT,
15702 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15703 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15704 DAG.getConstant(1, DL, MVT::i32));
15705 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15706 return DAG.getNode(
15707 ARMISD::VMOVN, DL, VT,
15708 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15709 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15710 DAG.getConstant(1, DL, MVT::i32));
15711 return SDValue();
15712}
15713
15714/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15715/// ISD::VECTOR_SHUFFLE.
15717 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15718 return R;
15719
15720 // The LLVM shufflevector instruction does not require the shuffle mask
15721 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15722 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15723 // operands do not match the mask length, they are extended by concatenating
15724 // them with undef vectors. That is probably the right thing for other
15725 // targets, but for NEON it is better to concatenate two double-register
15726 // size vector operands into a single quad-register size vector. Do that
15727 // transformation here:
15728 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15729 // shuffle(concat(v1, v2), undef)
15730 SDValue Op0 = N->getOperand(0);
15731 SDValue Op1 = N->getOperand(1);
15732 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15733 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15734 Op0.getNumOperands() != 2 ||
15735 Op1.getNumOperands() != 2)
15736 return SDValue();
15737 SDValue Concat0Op1 = Op0.getOperand(1);
15738 SDValue Concat1Op1 = Op1.getOperand(1);
15739 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15740 return SDValue();
15741 // Skip the transformation if any of the types are illegal.
15742 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15743 EVT VT = N->getValueType(0);
15744 if (!TLI.isTypeLegal(VT) ||
15745 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15746 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15747 return SDValue();
15748
15749 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15750 Op0.getOperand(0), Op1.getOperand(0));
15751 // Translate the shuffle mask.
15752 SmallVector<int, 16> NewMask;
15753 unsigned NumElts = VT.getVectorNumElements();
15754 unsigned HalfElts = NumElts/2;
15755 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15756 for (unsigned n = 0; n < NumElts; ++n) {
15757 int MaskElt = SVN->getMaskElt(n);
15758 int NewElt = -1;
15759 if (MaskElt < (int)HalfElts)
15760 NewElt = MaskElt;
15761 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15762 NewElt = HalfElts + MaskElt - NumElts;
15763 NewMask.push_back(NewElt);
15764 }
15765 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15766 DAG.getUNDEF(VT), NewMask);
15767}
15768
15769/// Load/store instruction that can be merged with a base address
15770/// update
15775 unsigned AddrOpIdx;
15776};
15777
15779 /// Instruction that updates a pointer
15781 /// Pointer increment operand
15783 /// Pointer increment value if it is a constant, or 0 otherwise
15784 unsigned ConstInc;
15785};
15786
15788 struct BaseUpdateUser &User,
15789 bool SimpleConstIncOnly,
15791 SelectionDAG &DAG = DCI.DAG;
15792 SDNode *N = Target.N;
15793 MemSDNode *MemN = cast<MemSDNode>(N);
15794 SDLoc dl(N);
15795
15796 // Find the new opcode for the updating load/store.
15797 bool isLoadOp = true;
15798 bool isLaneOp = false;
15799 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15800 // as an operand.
15801 bool hasAlignment = true;
15802 unsigned NewOpc = 0;
15803 unsigned NumVecs = 0;
15804 if (Target.isIntrinsic) {
15805 unsigned IntNo = N->getConstantOperandVal(1);
15806 switch (IntNo) {
15807 default:
15808 llvm_unreachable("unexpected intrinsic for Neon base update");
15809 case Intrinsic::arm_neon_vld1:
15810 NewOpc = ARMISD::VLD1_UPD;
15811 NumVecs = 1;
15812 break;
15813 case Intrinsic::arm_neon_vld2:
15814 NewOpc = ARMISD::VLD2_UPD;
15815 NumVecs = 2;
15816 break;
15817 case Intrinsic::arm_neon_vld3:
15818 NewOpc = ARMISD::VLD3_UPD;
15819 NumVecs = 3;
15820 break;
15821 case Intrinsic::arm_neon_vld4:
15822 NewOpc = ARMISD::VLD4_UPD;
15823 NumVecs = 4;
15824 break;
15825 case Intrinsic::arm_neon_vld1x2:
15826 NewOpc = ARMISD::VLD1x2_UPD;
15827 NumVecs = 2;
15828 hasAlignment = false;
15829 break;
15830 case Intrinsic::arm_neon_vld1x3:
15831 NewOpc = ARMISD::VLD1x3_UPD;
15832 NumVecs = 3;
15833 hasAlignment = false;
15834 break;
15835 case Intrinsic::arm_neon_vld1x4:
15836 NewOpc = ARMISD::VLD1x4_UPD;
15837 NumVecs = 4;
15838 hasAlignment = false;
15839 break;
15840 case Intrinsic::arm_neon_vld2dup:
15841 NewOpc = ARMISD::VLD2DUP_UPD;
15842 NumVecs = 2;
15843 break;
15844 case Intrinsic::arm_neon_vld3dup:
15845 NewOpc = ARMISD::VLD3DUP_UPD;
15846 NumVecs = 3;
15847 break;
15848 case Intrinsic::arm_neon_vld4dup:
15849 NewOpc = ARMISD::VLD4DUP_UPD;
15850 NumVecs = 4;
15851 break;
15852 case Intrinsic::arm_neon_vld2lane:
15853 NewOpc = ARMISD::VLD2LN_UPD;
15854 NumVecs = 2;
15855 isLaneOp = true;
15856 break;
15857 case Intrinsic::arm_neon_vld3lane:
15858 NewOpc = ARMISD::VLD3LN_UPD;
15859 NumVecs = 3;
15860 isLaneOp = true;
15861 break;
15862 case Intrinsic::arm_neon_vld4lane:
15863 NewOpc = ARMISD::VLD4LN_UPD;
15864 NumVecs = 4;
15865 isLaneOp = true;
15866 break;
15867 case Intrinsic::arm_neon_vst1:
15868 NewOpc = ARMISD::VST1_UPD;
15869 NumVecs = 1;
15870 isLoadOp = false;
15871 break;
15872 case Intrinsic::arm_neon_vst2:
15873 NewOpc = ARMISD::VST2_UPD;
15874 NumVecs = 2;
15875 isLoadOp = false;
15876 break;
15877 case Intrinsic::arm_neon_vst3:
15878 NewOpc = ARMISD::VST3_UPD;
15879 NumVecs = 3;
15880 isLoadOp = false;
15881 break;
15882 case Intrinsic::arm_neon_vst4:
15883 NewOpc = ARMISD::VST4_UPD;
15884 NumVecs = 4;
15885 isLoadOp = false;
15886 break;
15887 case Intrinsic::arm_neon_vst2lane:
15888 NewOpc = ARMISD::VST2LN_UPD;
15889 NumVecs = 2;
15890 isLoadOp = false;
15891 isLaneOp = true;
15892 break;
15893 case Intrinsic::arm_neon_vst3lane:
15894 NewOpc = ARMISD::VST3LN_UPD;
15895 NumVecs = 3;
15896 isLoadOp = false;
15897 isLaneOp = true;
15898 break;
15899 case Intrinsic::arm_neon_vst4lane:
15900 NewOpc = ARMISD::VST4LN_UPD;
15901 NumVecs = 4;
15902 isLoadOp = false;
15903 isLaneOp = true;
15904 break;
15905 case Intrinsic::arm_neon_vst1x2:
15906 NewOpc = ARMISD::VST1x2_UPD;
15907 NumVecs = 2;
15908 isLoadOp = false;
15909 hasAlignment = false;
15910 break;
15911 case Intrinsic::arm_neon_vst1x3:
15912 NewOpc = ARMISD::VST1x3_UPD;
15913 NumVecs = 3;
15914 isLoadOp = false;
15915 hasAlignment = false;
15916 break;
15917 case Intrinsic::arm_neon_vst1x4:
15918 NewOpc = ARMISD::VST1x4_UPD;
15919 NumVecs = 4;
15920 isLoadOp = false;
15921 hasAlignment = false;
15922 break;
15923 }
15924 } else {
15925 isLaneOp = true;
15926 switch (N->getOpcode()) {
15927 default:
15928 llvm_unreachable("unexpected opcode for Neon base update");
15929 case ARMISD::VLD1DUP:
15930 NewOpc = ARMISD::VLD1DUP_UPD;
15931 NumVecs = 1;
15932 break;
15933 case ARMISD::VLD2DUP:
15934 NewOpc = ARMISD::VLD2DUP_UPD;
15935 NumVecs = 2;
15936 break;
15937 case ARMISD::VLD3DUP:
15938 NewOpc = ARMISD::VLD3DUP_UPD;
15939 NumVecs = 3;
15940 break;
15941 case ARMISD::VLD4DUP:
15942 NewOpc = ARMISD::VLD4DUP_UPD;
15943 NumVecs = 4;
15944 break;
15945 case ISD::LOAD:
15946 NewOpc = ARMISD::VLD1_UPD;
15947 NumVecs = 1;
15948 isLaneOp = false;
15949 break;
15950 case ISD::STORE:
15951 NewOpc = ARMISD::VST1_UPD;
15952 NumVecs = 1;
15953 isLaneOp = false;
15954 isLoadOp = false;
15955 break;
15956 }
15957 }
15958
15959 // Find the size of memory referenced by the load/store.
15960 EVT VecTy;
15961 if (isLoadOp) {
15962 VecTy = N->getValueType(0);
15963 } else if (Target.isIntrinsic) {
15964 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15965 } else {
15966 assert(Target.isStore &&
15967 "Node has to be a load, a store, or an intrinsic!");
15968 VecTy = N->getOperand(1).getValueType();
15969 }
15970
15971 bool isVLDDUPOp =
15972 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15973 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15974
15975 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15976 if (isLaneOp || isVLDDUPOp)
15977 NumBytes /= VecTy.getVectorNumElements();
15978
15979 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15980 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15981 // separate instructions that make it harder to use a non-constant update.
15982 return false;
15983 }
15984
15985 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15986 return false;
15987
15988 // OK, we found an ADD we can fold into the base update.
15989 // Now, create a _UPD node, taking care of not breaking alignment.
15990
15991 EVT AlignedVecTy = VecTy;
15992 Align Alignment = MemN->getAlign();
15993
15994 // If this is a less-than-standard-aligned load/store, change the type to
15995 // match the standard alignment.
15996 // The alignment is overlooked when selecting _UPD variants; and it's
15997 // easier to introduce bitcasts here than fix that.
15998 // There are 3 ways to get to this base-update combine:
15999 // - intrinsics: they are assumed to be properly aligned (to the standard
16000 // alignment of the memory type), so we don't need to do anything.
16001 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16002 // intrinsics, so, likewise, there's nothing to do.
16003 // - generic load/store instructions: the alignment is specified as an
16004 // explicit operand, rather than implicitly as the standard alignment
16005 // of the memory type (like the intrisics). We need to change the
16006 // memory type to match the explicit alignment. That way, we don't
16007 // generate non-standard-aligned ARMISD::VLDx nodes.
16008 if (isa<LSBaseSDNode>(N)) {
16009 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16010 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16011 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16012 assert(!isLaneOp && "Unexpected generic load/store lane.");
16013 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16014 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16015 }
16016 // Don't set an explicit alignment on regular load/stores that we want
16017 // to transform to VLD/VST 1_UPD nodes.
16018 // This matches the behavior of regular load/stores, which only get an
16019 // explicit alignment if the MMO alignment is larger than the standard
16020 // alignment of the memory type.
16021 // Intrinsics, however, always get an explicit alignment, set to the
16022 // alignment of the MMO.
16023 Alignment = Align(1);
16024 }
16025
16026 // Create the new updating load/store node.
16027 // First, create an SDVTList for the new updating node's results.
16028 EVT Tys[6];
16029 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16030 unsigned n;
16031 for (n = 0; n < NumResultVecs; ++n)
16032 Tys[n] = AlignedVecTy;
16033 Tys[n++] = MVT::i32;
16034 Tys[n] = MVT::Other;
16035 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16036
16037 // Then, gather the new node's operands.
16039 Ops.push_back(N->getOperand(0)); // incoming chain
16040 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16041 Ops.push_back(User.Inc);
16042
16043 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16044 // Try to match the intrinsic's signature
16045 Ops.push_back(StN->getValue());
16046 } else {
16047 // Loads (and of course intrinsics) match the intrinsics' signature,
16048 // so just add all but the alignment operand.
16049 unsigned LastOperand =
16050 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16051 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16052 Ops.push_back(N->getOperand(i));
16053 }
16054
16055 // For all node types, the alignment operand is always the last one.
16056 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16057
16058 // If this is a non-standard-aligned STORE, the penultimate operand is the
16059 // stored value. Bitcast it to the aligned type.
16060 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16061 SDValue &StVal = Ops[Ops.size() - 2];
16062 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16063 }
16064
16065 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16066 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16067 MemN->getMemOperand());
16068
16069 // Update the uses.
16070 SmallVector<SDValue, 5> NewResults;
16071 for (unsigned i = 0; i < NumResultVecs; ++i)
16072 NewResults.push_back(SDValue(UpdN.getNode(), i));
16073
16074 // If this is an non-standard-aligned LOAD, the first result is the loaded
16075 // value. Bitcast it to the expected result type.
16076 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16077 SDValue &LdVal = NewResults[0];
16078 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16079 }
16080
16081 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16082 DCI.CombineTo(N, NewResults);
16083 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16084
16085 return true;
16086}
16087
16088// If (opcode ptr inc) is and ADD-like instruction, return the
16089// increment value. Otherwise return 0.
16090static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16091 SDValue Inc, const SelectionDAG &DAG) {
16092 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16093 if (!CInc)
16094 return 0;
16095
16096 switch (Opcode) {
16097 case ARMISD::VLD1_UPD:
16098 case ISD::ADD:
16099 return CInc->getZExtValue();
16100 case ISD::OR: {
16101 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16102 // (OR ptr inc) is the same as (ADD ptr inc)
16103 return CInc->getZExtValue();
16104 }
16105 return 0;
16106 }
16107 default:
16108 return 0;
16109 }
16110}
16111
16113 switch (N->getOpcode()) {
16114 case ISD::ADD:
16115 case ISD::OR: {
16116 if (isa<ConstantSDNode>(N->getOperand(1))) {
16117 *Ptr = N->getOperand(0);
16118 *CInc = N->getOperand(1);
16119 return true;
16120 }
16121 return false;
16122 }
16123 case ARMISD::VLD1_UPD: {
16124 if (isa<ConstantSDNode>(N->getOperand(2))) {
16125 *Ptr = N->getOperand(1);
16126 *CInc = N->getOperand(2);
16127 return true;
16128 }
16129 return false;
16130 }
16131 default:
16132 return false;
16133 }
16134}
16135
16137 // Check that the add is independent of the load/store.
16138 // Otherwise, folding it would create a cycle. Search through Addr
16139 // as well, since the User may not be a direct user of Addr and
16140 // only share a base pointer.
16143 Worklist.push_back(N);
16144 Worklist.push_back(User);
16145 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16146 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16147 return false;
16148 return true;
16149}
16150
16151/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16152/// NEON load/store intrinsics, and generic vector load/stores, to merge
16153/// base address updates.
16154/// For generic load/stores, the memory type is assumed to be a vector.
16155/// The caller is assumed to have checked legality.
16158 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16159 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16160 const bool isStore = N->getOpcode() == ISD::STORE;
16161 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16162 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16163
16164 SDValue Addr = N->getOperand(AddrOpIdx);
16165
16167
16168 // Search for a use of the address operand that is an increment.
16169 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16170 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16171 SDNode *User = *UI;
16172 if (UI.getUse().getResNo() != Addr.getResNo() ||
16173 User->getNumOperands() != 2)
16174 continue;
16175
16176 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16177 unsigned ConstInc =
16178 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16179
16180 if (ConstInc || User->getOpcode() == ISD::ADD)
16181 BaseUpdates.push_back({User, Inc, ConstInc});
16182 }
16183
16184 // If the address is a constant pointer increment itself, find
16185 // another constant increment that has the same base operand
16186 SDValue Base;
16187 SDValue CInc;
16188 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16189 unsigned Offset =
16190 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16191 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16192 UI != UE; ++UI) {
16193
16194 SDNode *User = *UI;
16195 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16196 User->getNumOperands() != 2)
16197 continue;
16198
16199 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16200 unsigned UserOffset =
16201 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16202
16203 if (!UserOffset || UserOffset <= Offset)
16204 continue;
16205
16206 unsigned NewConstInc = UserOffset - Offset;
16207 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16208 BaseUpdates.push_back({User, NewInc, NewConstInc});
16209 }
16210 }
16211
16212 // Try to fold the load/store with an update that matches memory
16213 // access size. This should work well for sequential loads.
16214 //
16215 // Filter out invalid updates as well.
16216 unsigned NumValidUpd = BaseUpdates.size();
16217 for (unsigned I = 0; I < NumValidUpd;) {
16218 BaseUpdateUser &User = BaseUpdates[I];
16219 if (!isValidBaseUpdate(N, User.N)) {
16220 --NumValidUpd;
16221 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16222 continue;
16223 }
16224
16225 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16226 return SDValue();
16227 ++I;
16228 }
16229 BaseUpdates.resize(NumValidUpd);
16230
16231 // Try to fold with other users. Non-constant updates are considered
16232 // first, and constant updates are sorted to not break a sequence of
16233 // strided accesses (if there is any).
16234 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16235 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16236 return LHS.ConstInc < RHS.ConstInc;
16237 });
16238 for (BaseUpdateUser &User : BaseUpdates) {
16239 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16240 return SDValue();
16241 }
16242 return SDValue();
16243}
16244
16247 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16248 return SDValue();
16249
16250 return CombineBaseUpdate(N, DCI);
16251}
16252
16255 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16256 return SDValue();
16257
16258 SelectionDAG &DAG = DCI.DAG;
16259 SDValue Addr = N->getOperand(2);
16260 MemSDNode *MemN = cast<MemSDNode>(N);
16261 SDLoc dl(N);
16262
16263 // For the stores, where there are multiple intrinsics we only actually want
16264 // to post-inc the last of the them.
16265 unsigned IntNo = N->getConstantOperandVal(1);
16266 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16267 return SDValue();
16268 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16269 return SDValue();
16270
16271 // Search for a use of the address operand that is an increment.
16272 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16273 UE = Addr.getNode()->use_end();
16274 UI != UE; ++UI) {
16275 SDNode *User = *UI;
16276 if (User->getOpcode() != ISD::ADD ||
16277 UI.getUse().getResNo() != Addr.getResNo())
16278 continue;
16279
16280 // Check that the add is independent of the load/store. Otherwise, folding
16281 // it would create a cycle. We can avoid searching through Addr as it's a
16282 // predecessor to both.
16285 Visited.insert(Addr.getNode());
16286 Worklist.push_back(N);
16287 Worklist.push_back(User);
16288 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16289 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16290 continue;
16291
16292 // Find the new opcode for the updating load/store.
16293 bool isLoadOp = true;
16294 unsigned NewOpc = 0;
16295 unsigned NumVecs = 0;
16296 switch (IntNo) {
16297 default:
16298 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16299 case Intrinsic::arm_mve_vld2q:
16300 NewOpc = ARMISD::VLD2_UPD;
16301 NumVecs = 2;
16302 break;
16303 case Intrinsic::arm_mve_vld4q:
16304 NewOpc = ARMISD::VLD4_UPD;
16305 NumVecs = 4;
16306 break;
16307 case Intrinsic::arm_mve_vst2q:
16308 NewOpc = ARMISD::VST2_UPD;
16309 NumVecs = 2;
16310 isLoadOp = false;
16311 break;
16312 case Intrinsic::arm_mve_vst4q:
16313 NewOpc = ARMISD::VST4_UPD;
16314 NumVecs = 4;
16315 isLoadOp = false;
16316 break;
16317 }
16318
16319 // Find the size of memory referenced by the load/store.
16320 EVT VecTy;
16321 if (isLoadOp) {
16322 VecTy = N->getValueType(0);
16323 } else {
16324 VecTy = N->getOperand(3).getValueType();
16325 }
16326
16327 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16328
16329 // If the increment is a constant, it must match the memory ref size.
16330 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16331 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16332 if (!CInc || CInc->getZExtValue() != NumBytes)
16333 continue;
16334
16335 // Create the new updating load/store node.
16336 // First, create an SDVTList for the new updating node's results.
16337 EVT Tys[6];
16338 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16339 unsigned n;
16340 for (n = 0; n < NumResultVecs; ++n)
16341 Tys[n] = VecTy;
16342 Tys[n++] = MVT::i32;
16343 Tys[n] = MVT::Other;
16344 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16345
16346 // Then, gather the new node's operands.
16348 Ops.push_back(N->getOperand(0)); // incoming chain
16349 Ops.push_back(N->getOperand(2)); // ptr
16350 Ops.push_back(Inc);
16351
16352 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16353 Ops.push_back(N->getOperand(i));
16354
16355 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16356 MemN->getMemOperand());
16357
16358 // Update the uses.
16359 SmallVector<SDValue, 5> NewResults;
16360 for (unsigned i = 0; i < NumResultVecs; ++i)
16361 NewResults.push_back(SDValue(UpdN.getNode(), i));
16362
16363 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16364 DCI.CombineTo(N, NewResults);
16365 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16366
16367 break;
16368 }
16369
16370 return SDValue();
16371}
16372
16373/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16374/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16375/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16376/// return true.
16378 SelectionDAG &DAG = DCI.DAG;
16379 EVT VT = N->getValueType(0);
16380 // vldN-dup instructions only support 64-bit vectors for N > 1.
16381 if (!VT.is64BitVector())
16382 return false;
16383
16384 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16385 SDNode *VLD = N->getOperand(0).getNode();
16386 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16387 return false;
16388 unsigned NumVecs = 0;
16389 unsigned NewOpc = 0;
16390 unsigned IntNo = VLD->getConstantOperandVal(1);
16391 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16392 NumVecs = 2;
16393 NewOpc = ARMISD::VLD2DUP;
16394 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16395 NumVecs = 3;
16396 NewOpc = ARMISD::VLD3DUP;
16397 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16398 NumVecs = 4;
16399 NewOpc = ARMISD::VLD4DUP;
16400 } else {
16401 return false;
16402 }
16403
16404 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16405 // numbers match the load.
16406 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16407 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16408 UI != UE; ++UI) {
16409 // Ignore uses of the chain result.
16410 if (UI.getUse().getResNo() == NumVecs)
16411 continue;
16412 SDNode *User = *UI;
16413 if (User->getOpcode() != ARMISD::VDUPLANE ||
16414 VLDLaneNo != User->getConstantOperandVal(1))
16415 return false;
16416 }
16417
16418 // Create the vldN-dup node.
16419 EVT Tys[5];
16420 unsigned n;
16421 for (n = 0; n < NumVecs; ++n)
16422 Tys[n] = VT;
16423 Tys[n] = MVT::Other;
16424 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16425 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16426 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16427 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16428 Ops, VLDMemInt->getMemoryVT(),
16429 VLDMemInt->getMemOperand());
16430
16431 // Update the uses.
16432 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16433 UI != UE; ++UI) {
16434 unsigned ResNo = UI.getUse().getResNo();
16435 // Ignore uses of the chain result.
16436 if (ResNo == NumVecs)
16437 continue;
16438 SDNode *User = *UI;
16439 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16440 }
16441
16442 // Now the vldN-lane intrinsic is dead except for its chain result.
16443 // Update uses of the chain.
16444 std::vector<SDValue> VLDDupResults;
16445 for (unsigned n = 0; n < NumVecs; ++n)
16446 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16447 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16448 DCI.CombineTo(VLD, VLDDupResults);
16449
16450 return true;
16451}
16452
16453/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16454/// ARMISD::VDUPLANE.
16457 const ARMSubtarget *Subtarget) {
16458 SDValue Op = N->getOperand(0);
16459 EVT VT = N->getValueType(0);
16460
16461 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16462 if (Subtarget->hasMVEIntegerOps()) {
16463 EVT ExtractVT = VT.getVectorElementType();
16464 // We need to ensure we are creating a legal type.
16465 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16466 ExtractVT = MVT::i32;
16467 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16468 N->getOperand(0), N->getOperand(1));
16469 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16470 }
16471
16472 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16473 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16474 if (CombineVLDDUP(N, DCI))
16475 return SDValue(N, 0);
16476
16477 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16478 // redundant. Ignore bit_converts for now; element sizes are checked below.
16479 while (Op.getOpcode() == ISD::BITCAST)
16480 Op = Op.getOperand(0);
16481 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16482 return SDValue();
16483
16484 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16485 unsigned EltSize = Op.getScalarValueSizeInBits();
16486 // The canonical VMOV for a zero vector uses a 32-bit element size.
16487 unsigned Imm = Op.getConstantOperandVal(0);
16488 unsigned EltBits;
16489 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16490 EltSize = 8;
16491 if (EltSize > VT.getScalarSizeInBits())
16492 return SDValue();
16493
16494 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16495}
16496
16497/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16499 const ARMSubtarget *Subtarget) {
16500 SDValue Op = N->getOperand(0);
16501 SDLoc dl(N);
16502
16503 if (Subtarget->hasMVEIntegerOps()) {
16504 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16505 // need to come from a GPR.
16506 if (Op.getValueType() == MVT::f32)
16507 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16508 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16509 else if (Op.getValueType() == MVT::f16)
16510 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16511 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16512 }
16513
16514 if (!Subtarget->hasNEON())
16515 return SDValue();
16516
16517 // Match VDUP(LOAD) -> VLD1DUP.
16518 // We match this pattern here rather than waiting for isel because the
16519 // transform is only legal for unindexed loads.
16520 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16521 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16522 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16523 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16524 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16525 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16526 SDValue VLDDup =
16527 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16528 LD->getMemoryVT(), LD->getMemOperand());
16529 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16530 return VLDDup;
16531 }
16532
16533 return SDValue();
16534}
16535
16538 const ARMSubtarget *Subtarget) {
16539 EVT VT = N->getValueType(0);
16540
16541 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16542 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16544 return CombineBaseUpdate(N, DCI);
16545
16546 return SDValue();
16547}
16548
16549// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16550// pack all of the elements in one place. Next, store to memory in fewer
16551// chunks.
16553 SelectionDAG &DAG) {
16554 SDValue StVal = St->getValue();
16555 EVT VT = StVal.getValueType();
16556 if (!St->isTruncatingStore() || !VT.isVector())
16557 return SDValue();
16558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16559 EVT StVT = St->getMemoryVT();
16560 unsigned NumElems = VT.getVectorNumElements();
16561 assert(StVT != VT && "Cannot truncate to the same type");
16562 unsigned FromEltSz = VT.getScalarSizeInBits();
16563 unsigned ToEltSz = StVT.getScalarSizeInBits();
16564
16565 // From, To sizes and ElemCount must be pow of two
16566 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16567 return SDValue();
16568
16569 // We are going to use the original vector elt for storing.
16570 // Accumulated smaller vector elements must be a multiple of the store size.
16571 if (0 != (NumElems * FromEltSz) % ToEltSz)
16572 return SDValue();
16573
16574 unsigned SizeRatio = FromEltSz / ToEltSz;
16575 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16576
16577 // Create a type on which we perform the shuffle.
16578 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16579 NumElems * SizeRatio);
16580 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16581
16582 SDLoc DL(St);
16583 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16584 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16585 for (unsigned i = 0; i < NumElems; ++i)
16586 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16587 : i * SizeRatio;
16588
16589 // Can't shuffle using an illegal type.
16590 if (!TLI.isTypeLegal(WideVecVT))
16591 return SDValue();
16592
16593 SDValue Shuff = DAG.getVectorShuffle(
16594 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16595 // At this point all of the data is stored at the bottom of the
16596 // register. We now need to save it to mem.
16597
16598 // Find the largest store unit
16599 MVT StoreType = MVT::i8;
16600 for (MVT Tp : MVT::integer_valuetypes()) {
16601 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16602 StoreType = Tp;
16603 }
16604 // Didn't find a legal store type.
16605 if (!TLI.isTypeLegal(StoreType))
16606 return SDValue();
16607
16608 // Bitcast the original vector into a vector of store-size units
16609 EVT StoreVecVT =
16610 EVT::getVectorVT(*DAG.getContext(), StoreType,
16611 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16612 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16613 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16615 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16616 TLI.getPointerTy(DAG.getDataLayout()));
16617 SDValue BasePtr = St->getBasePtr();
16618
16619 // Perform one or more big stores into memory.
16620 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16621 for (unsigned I = 0; I < E; I++) {
16622 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16623 ShuffWide, DAG.getIntPtrConstant(I, DL));
16624 SDValue Ch =
16625 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16626 St->getAlign(), St->getMemOperand()->getFlags());
16627 BasePtr =
16628 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16629 Chains.push_back(Ch);
16630 }
16631 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16632}
16633
16634// Try taking a single vector store from an fpround (which would otherwise turn
16635// into an expensive buildvector) and splitting it into a series of narrowing
16636// stores.
16638 SelectionDAG &DAG) {
16639 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16640 return SDValue();
16641 SDValue Trunc = St->getValue();
16642 if (Trunc->getOpcode() != ISD::FP_ROUND)
16643 return SDValue();
16644 EVT FromVT = Trunc->getOperand(0).getValueType();
16645 EVT ToVT = Trunc.getValueType();
16646 if (!ToVT.isVector())
16647 return SDValue();
16649 EVT ToEltVT = ToVT.getVectorElementType();
16650 EVT FromEltVT = FromVT.getVectorElementType();
16651
16652 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16653 return SDValue();
16654
16655 unsigned NumElements = 4;
16656 if (FromVT.getVectorNumElements() % NumElements != 0)
16657 return SDValue();
16658
16659 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16660 // use the VMOVN over splitting the store. We are looking for patterns of:
16661 // !rev: 0 N 1 N+1 2 N+2 ...
16662 // rev: N 0 N+1 1 N+2 2 ...
16663 // The shuffle may either be a single source (in which case N = NumElts/2) or
16664 // two inputs extended with concat to the same size (in which case N =
16665 // NumElts).
16666 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16667 ArrayRef<int> M = SVN->getMask();
16668 unsigned NumElts = ToVT.getVectorNumElements();
16669 if (SVN->getOperand(1).isUndef())
16670 NumElts /= 2;
16671
16672 unsigned Off0 = Rev ? NumElts : 0;
16673 unsigned Off1 = Rev ? 0 : NumElts;
16674
16675 for (unsigned I = 0; I < NumElts; I += 2) {
16676 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16677 return false;
16678 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16679 return false;
16680 }
16681
16682 return true;
16683 };
16684
16685 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16686 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16687 return SDValue();
16688
16689 LLVMContext &C = *DAG.getContext();
16690 SDLoc DL(St);
16691 // Details about the old store
16692 SDValue Ch = St->getChain();
16693 SDValue BasePtr = St->getBasePtr();
16694 Align Alignment = St->getOriginalAlign();
16696 AAMDNodes AAInfo = St->getAAInfo();
16697
16698 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16699 // and then stored as truncating integer stores.
16700 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16701 EVT NewToVT = EVT::getVectorVT(
16702 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16703
16705 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16706 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16707 SDValue NewPtr =
16708 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16709
16710 SDValue Extract =
16711 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16712 DAG.getConstant(i * NumElements, DL, MVT::i32));
16713
16714 SDValue FPTrunc =
16715 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16716 Extract, DAG.getConstant(0, DL, MVT::i32));
16717 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16718
16719 SDValue Store = DAG.getTruncStore(
16720 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16721 NewToVT, Alignment, MMOFlags, AAInfo);
16722 Stores.push_back(Store);
16723 }
16724 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16725}
16726
16727// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16728// into an expensive buildvector) and splitting it into a series of narrowing
16729// stores.
16731 SelectionDAG &DAG) {
16732 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16733 return SDValue();
16734 SDValue Trunc = St->getValue();
16735 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16736 return SDValue();
16737 EVT FromVT = Trunc->getOperand(0).getValueType();
16738 EVT ToVT = Trunc.getValueType();
16739
16740 LLVMContext &C = *DAG.getContext();
16741 SDLoc DL(St);
16742 // Details about the old store
16743 SDValue Ch = St->getChain();
16744 SDValue BasePtr = St->getBasePtr();
16745 Align Alignment = St->getOriginalAlign();
16747 AAMDNodes AAInfo = St->getAAInfo();
16748
16749 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16750 FromVT.getVectorNumElements());
16751
16753 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16754 unsigned NewOffset =
16755 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16756 SDValue NewPtr =
16757 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16758
16759 SDValue Extract = Trunc.getOperand(i);
16760 SDValue Store = DAG.getTruncStore(
16761 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16762 NewToVT, Alignment, MMOFlags, AAInfo);
16763 Stores.push_back(Store);
16764 }
16765 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16766}
16767
16768// Given a floating point store from an extracted vector, with an integer
16769// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16770// help reduce fp register pressure, doesn't require the fp extract and allows
16771// use of more integer post-inc stores not available with vstr.
16773 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16774 return SDValue();
16775 SDValue Extract = St->getValue();
16776 EVT VT = Extract.getValueType();
16777 // For now only uses f16. This may be useful for f32 too, but that will
16778 // be bitcast(extract), not the VGETLANEu we currently check here.
16779 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16780 return SDValue();
16781
16782 SDNode *GetLane =
16783 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16784 {Extract.getOperand(0), Extract.getOperand(1)});
16785 if (!GetLane)
16786 return SDValue();
16787
16788 LLVMContext &C = *DAG.getContext();
16789 SDLoc DL(St);
16790 // Create a new integer store to replace the existing floating point version.
16791 SDValue Ch = St->getChain();
16792 SDValue BasePtr = St->getBasePtr();
16793 Align Alignment = St->getOriginalAlign();
16795 AAMDNodes AAInfo = St->getAAInfo();
16796 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16797 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16798 St->getPointerInfo(), NewToVT, Alignment,
16799 MMOFlags, AAInfo);
16800
16801 return Store;
16802}
16803
16804/// PerformSTORECombine - Target-specific dag combine xforms for
16805/// ISD::STORE.
16808 const ARMSubtarget *Subtarget) {
16809 StoreSDNode *St = cast<StoreSDNode>(N);
16810 if (St->isVolatile())
16811 return SDValue();
16812 SDValue StVal = St->getValue();
16813 EVT VT = StVal.getValueType();
16814
16815 if (Subtarget->hasNEON())
16816 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16817 return Store;
16818
16819 if (Subtarget->hasMVEFloatOps())
16820 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16821 return NewToken;
16822
16823 if (Subtarget->hasMVEIntegerOps()) {
16824 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16825 return NewChain;
16826 if (SDValue NewToken =
16828 return NewToken;
16829 }
16830
16831 if (!ISD::isNormalStore(St))
16832 return SDValue();
16833
16834 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16835 // ARM stores of arguments in the same cache line.
16836 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16837 StVal.getNode()->hasOneUse()) {
16838 SelectionDAG &DAG = DCI.DAG;
16839 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16840 SDLoc DL(St);
16841 SDValue BasePtr = St->getBasePtr();
16842 SDValue NewST1 = DAG.getStore(
16843 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16844 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16845 St->getMemOperand()->getFlags());
16846
16847 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16848 DAG.getConstant(4, DL, MVT::i32));
16849 return DAG.getStore(NewST1.getValue(0), DL,
16850 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16851 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16852 St->getOriginalAlign(),
16853 St->getMemOperand()->getFlags());
16854 }
16855
16856 if (StVal.getValueType() == MVT::i64 &&
16858
16859 // Bitcast an i64 store extracted from a vector to f64.
16860 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16861 SelectionDAG &DAG = DCI.DAG;
16862 SDLoc dl(StVal);
16863 SDValue IntVec = StVal.getOperand(0);
16864 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16866 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16867 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16868 Vec, StVal.getOperand(1));
16869 dl = SDLoc(N);
16870 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16871 // Make the DAGCombiner fold the bitcasts.
16872 DCI.AddToWorklist(Vec.getNode());
16873 DCI.AddToWorklist(ExtElt.getNode());
16874 DCI.AddToWorklist(V.getNode());
16875 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16876 St->getPointerInfo(), St->getAlign(),
16877 St->getMemOperand()->getFlags(), St->getAAInfo());
16878 }
16879
16880 // If this is a legal vector store, try to combine it into a VST1_UPD.
16881 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16883 return CombineBaseUpdate(N, DCI);
16884
16885 return SDValue();
16886}
16887
16888/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16889/// can replace combinations of VMUL and VCVT (floating-point to integer)
16890/// when the VMUL has a constant operand that is a power of 2.
16891///
16892/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16893/// vmul.f32 d16, d17, d16
16894/// vcvt.s32.f32 d16, d16
16895/// becomes:
16896/// vcvt.s32.f32 d16, d16, #3
16898 const ARMSubtarget *Subtarget) {
16899 if (!Subtarget->hasNEON())
16900 return SDValue();
16901
16902 SDValue Op = N->getOperand(0);
16903 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16904 Op.getOpcode() != ISD::FMUL)
16905 return SDValue();
16906
16907 SDValue ConstVec = Op->getOperand(1);
16908 if (!isa<BuildVectorSDNode>(ConstVec))
16909 return SDValue();
16910
16911 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16912 uint32_t FloatBits = FloatTy.getSizeInBits();
16913 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16914 uint32_t IntBits = IntTy.getSizeInBits();
16915 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16916 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16917 // These instructions only exist converting from f32 to i32. We can handle
16918 // smaller integers by generating an extra truncate, but larger ones would
16919 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16920 // these intructions only support v2i32/v4i32 types.
16921 return SDValue();
16922 }
16923
16924 BitVector UndefElements;
16925 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16926 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16927 if (C == -1 || C == 0 || C > 32)
16928 return SDValue();
16929
16930 SDLoc dl(N);
16931 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16932 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16933 Intrinsic::arm_neon_vcvtfp2fxu;
16934 SDValue FixConv = DAG.getNode(
16935 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16936 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16937 DAG.getConstant(C, dl, MVT::i32));
16938
16939 if (IntBits < FloatBits)
16940 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16941
16942 return FixConv;
16943}
16944
16946 const ARMSubtarget *Subtarget) {
16947 if (!Subtarget->hasMVEFloatOps())
16948 return SDValue();
16949
16950 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16951 // The second form can be more easily turned into a predicated vadd, and
16952 // possibly combined into a fma to become a predicated vfma.
16953 SDValue Op0 = N->getOperand(0);
16954 SDValue Op1 = N->getOperand(1);
16955 EVT VT = N->getValueType(0);
16956 SDLoc DL(N);
16957
16958 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16959 // which these VMOV's represent.
16960 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16961 if (Op.getOpcode() != ISD::BITCAST ||
16962 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16963 return false;
16964 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16965 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16966 return true;
16967 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16968 return true;
16969 return false;
16970 };
16971
16972 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16973 std::swap(Op0, Op1);
16974
16975 if (Op1.getOpcode() != ISD::VSELECT)
16976 return SDValue();
16977
16978 SDNodeFlags FaddFlags = N->getFlags();
16979 bool NSZ = FaddFlags.hasNoSignedZeros();
16980 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16981 return SDValue();
16982
16983 SDValue FAdd =
16984 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16985 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16986}
16987
16989 SDValue LHS = N->getOperand(0);
16990 SDValue RHS = N->getOperand(1);
16991 EVT VT = N->getValueType(0);
16992 SDLoc DL(N);
16993
16994 if (!N->getFlags().hasAllowReassociation())
16995 return SDValue();
16996
16997 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
16998 auto ReassocComplex = [&](SDValue A, SDValue B) {
16999 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17000 return SDValue();
17001 unsigned Opc = A.getConstantOperandVal(0);
17002 if (Opc != Intrinsic::arm_mve_vcmlaq)
17003 return SDValue();
17004 SDValue VCMLA = DAG.getNode(
17005 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17006 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17007 A.getOperand(3), A.getOperand(4));
17008 VCMLA->setFlags(A->getFlags());
17009 return VCMLA;
17010 };
17011 if (SDValue R = ReassocComplex(LHS, RHS))
17012 return R;
17013 if (SDValue R = ReassocComplex(RHS, LHS))
17014 return R;
17015
17016 return SDValue();
17017}
17018
17020 const ARMSubtarget *Subtarget) {
17021 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17022 return S;
17023 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17024 return S;
17025 return SDValue();
17026}
17027
17028/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17029/// can replace combinations of VCVT (integer to floating-point) and VMUL
17030/// when the VMUL has a constant operand that is a power of 2.
17031///
17032/// Example (assume d17 = <float 0.125, float 0.125>):
17033/// vcvt.f32.s32 d16, d16
17034/// vmul.f32 d16, d16, d17
17035/// becomes:
17036/// vcvt.f32.s32 d16, d16, #3
17038 const ARMSubtarget *Subtarget) {
17039 if (!Subtarget->hasNEON())
17040 return SDValue();
17041
17042 SDValue Op = N->getOperand(0);
17043 unsigned OpOpcode = Op.getNode()->getOpcode();
17044 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17045 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17046 return SDValue();
17047
17048 SDValue ConstVec = N->getOperand(1);
17049 if (!isa<BuildVectorSDNode>(ConstVec))
17050 return SDValue();
17051
17052 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17053 uint32_t FloatBits = FloatTy.getSizeInBits();
17054 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17055 uint32_t IntBits = IntTy.getSizeInBits();
17056 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17057 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17058 // These instructions only exist converting from i32 to f32. We can handle
17059 // smaller integers by generating an extra extend, but larger ones would
17060 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17061 // these intructions only support v2i32/v4i32 types.
17062 return SDValue();
17063 }
17064
17065 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17066 APFloat Recip(0.0f);
17067 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17068 return SDValue();
17069
17070 bool IsExact;
17071 APSInt IntVal(33);
17072 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17073 APFloat::opOK ||
17074 !IsExact)
17075 return SDValue();
17076
17077 int32_t C = IntVal.exactLogBase2();
17078 if (C == -1 || C == 0 || C > 32)
17079 return SDValue();
17080
17081 SDLoc DL(N);
17082 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17083 SDValue ConvInput = Op.getOperand(0);
17084 if (IntBits < FloatBits)
17086 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17087
17088 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17089 : Intrinsic::arm_neon_vcvtfxu2fp;
17090 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17091 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17092 DAG.getConstant(C, DL, MVT::i32));
17093}
17094
17096 const ARMSubtarget *ST) {
17097 if (!ST->hasMVEIntegerOps())
17098 return SDValue();
17099
17100 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17101 EVT ResVT = N->getValueType(0);
17102 SDValue N0 = N->getOperand(0);
17103 SDLoc dl(N);
17104
17105 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17106 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17107 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17108 N0.getValueType() == MVT::v16i8)) {
17109 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17110 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17111 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17112 }
17113
17114 // We are looking for something that will have illegal types if left alone,
17115 // but that we can convert to a single instruction under MVE. For example
17116 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17117 // or
17118 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17119
17120 // The legal cases are:
17121 // VADDV u/s 8/16/32
17122 // VMLAV u/s 8/16/32
17123 // VADDLV u/s 32
17124 // VMLALV u/s 16/32
17125
17126 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17127 // extend it and use v4i32 instead.
17128 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17129 EVT AVT = A.getValueType();
17130 return any_of(ExtTypes, [&](MVT Ty) {
17131 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17132 AVT.bitsLE(Ty);
17133 });
17134 };
17135 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17136 EVT AVT = A.getValueType();
17137 if (!AVT.is128BitVector())
17138 A = DAG.getNode(ExtendCode, dl,
17140 128 / AVT.getVectorMinNumElements())),
17141 A);
17142 return A;
17143 };
17144 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17145 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17146 return SDValue();
17147 SDValue A = N0->getOperand(0);
17148 if (ExtTypeMatches(A, ExtTypes))
17149 return ExtendIfNeeded(A, ExtendCode);
17150 return SDValue();
17151 };
17152 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17153 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17154 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17156 return SDValue();
17157 Mask = N0->getOperand(0);
17158 SDValue Ext = N0->getOperand(1);
17159 if (Ext->getOpcode() != ExtendCode)
17160 return SDValue();
17161 SDValue A = Ext->getOperand(0);
17162 if (ExtTypeMatches(A, ExtTypes))
17163 return ExtendIfNeeded(A, ExtendCode);
17164 return SDValue();
17165 };
17166 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17167 SDValue &A, SDValue &B) {
17168 // For a vmla we are trying to match a larger pattern:
17169 // ExtA = sext/zext A
17170 // ExtB = sext/zext B
17171 // Mul = mul ExtA, ExtB
17172 // vecreduce.add Mul
17173 // There might also be en extra extend between the mul and the addreduce, so
17174 // long as the bitwidth is high enough to make them equivalent (for example
17175 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17176 if (ResVT != RetTy)
17177 return false;
17178 SDValue Mul = N0;
17179 if (Mul->getOpcode() == ExtendCode &&
17180 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17181 ResVT.getScalarSizeInBits())
17182 Mul = Mul->getOperand(0);
17183 if (Mul->getOpcode() != ISD::MUL)
17184 return false;
17185 SDValue ExtA = Mul->getOperand(0);
17186 SDValue ExtB = Mul->getOperand(1);
17187 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17188 return false;
17189 A = ExtA->getOperand(0);
17190 B = ExtB->getOperand(0);
17191 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17192 A = ExtendIfNeeded(A, ExtendCode);
17193 B = ExtendIfNeeded(B, ExtendCode);
17194 return true;
17195 }
17196 return false;
17197 };
17198 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17199 SDValue &A, SDValue &B, SDValue &Mask) {
17200 // Same as the pattern above with a select for the zero predicated lanes
17201 // ExtA = sext/zext A
17202 // ExtB = sext/zext B
17203 // Mul = mul ExtA, ExtB
17204 // N0 = select Mask, Mul, 0
17205 // vecreduce.add N0
17206 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17208 return false;
17209 Mask = N0->getOperand(0);
17210 SDValue Mul = N0->getOperand(1);
17211 if (Mul->getOpcode() == ExtendCode &&
17212 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17213 ResVT.getScalarSizeInBits())
17214 Mul = Mul->getOperand(0);
17215 if (Mul->getOpcode() != ISD::MUL)
17216 return false;
17217 SDValue ExtA = Mul->getOperand(0);
17218 SDValue ExtB = Mul->getOperand(1);
17219 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17220 return false;
17221 A = ExtA->getOperand(0);
17222 B = ExtB->getOperand(0);
17223 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17224 A = ExtendIfNeeded(A, ExtendCode);
17225 B = ExtendIfNeeded(B, ExtendCode);
17226 return true;
17227 }
17228 return false;
17229 };
17230 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17231 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17232 // reductions. The operands are extended with MVEEXT, but as they are
17233 // reductions the lane orders do not matter. MVEEXT may be combined with
17234 // loads to produce two extending loads, or else they will be expanded to
17235 // VREV/VMOVL.
17236 EVT VT = Ops[0].getValueType();
17237 if (VT == MVT::v16i8) {
17238 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17239 "Unexpected illegal long reduction opcode");
17240 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17241
17242 SDValue Ext0 =
17243 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17244 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17245 SDValue Ext1 =
17246 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17247 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17248
17249 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17250 Ext0, Ext1);
17251 SDValue MLA1 =
17252 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17253 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17254 Ext0.getValue(1), Ext1.getValue(1));
17255 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17256 }
17257 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17258 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17259 SDValue(Node.getNode(), 1));
17260 };
17261
17262 SDValue A, B;
17263 SDValue Mask;
17264 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17265 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17266 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17267 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17268 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17269 A, B))
17270 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17271 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17272 A, B))
17273 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17274 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17275 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17276 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17277 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17278 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17279 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17280
17281 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17282 Mask))
17283 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17284 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17285 Mask))
17286 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17287 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17288 Mask))
17289 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17290 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17291 Mask))
17292 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17293 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17294 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17295 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17296 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17297 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17298 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17299
17300 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17301 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17302 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17303 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17304 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17305 return Create64bitNode(ARMISD::VADDLVs, {A});
17306 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17307 return Create64bitNode(ARMISD::VADDLVu, {A});
17308 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17309 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17310 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17311 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17312 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17313 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17314
17315 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17316 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17317 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17318 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17319 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17320 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17321 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17322 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17323 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17324 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17325 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17326 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17327 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17328 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17329
17330 // Some complications. We can get a case where the two inputs of the mul are
17331 // the same, then the output sext will have been helpfully converted to a
17332 // zext. Turn it back.
17333 SDValue Op = N0;
17334 if (Op->getOpcode() == ISD::VSELECT)
17335 Op = Op->getOperand(1);
17336 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17337 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17338 SDValue Mul = Op->getOperand(0);
17339 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17340 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17341 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17342 if (Op != N0)
17343 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17344 N0->getOperand(0), Ext, N0->getOperand(2));
17345 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17346 }
17347 }
17348
17349 return SDValue();
17350}
17351
17352// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17353// the lanes are used. Due to the reduction being commutative the shuffle can be
17354// removed.
17356 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17357 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17358 if (!Shuf || !Shuf->getOperand(1).isUndef())
17359 return SDValue();
17360
17361 // Check all elements are used once in the mask.
17362 ArrayRef<int> Mask = Shuf->getMask();
17363 APInt SetElts(Mask.size(), 0);
17364 for (int E : Mask) {
17365 if (E < 0 || E >= (int)Mask.size())
17366 return SDValue();
17367 SetElts.setBit(E);
17368 }
17369 if (!SetElts.isAllOnes())
17370 return SDValue();
17371
17372 if (N->getNumOperands() != VecOp + 1) {
17373 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17374 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17375 return SDValue();
17376 }
17377
17379 for (SDValue Op : N->ops()) {
17380 if (Op.getValueType().isVector())
17381 Ops.push_back(Op.getOperand(0));
17382 else
17383 Ops.push_back(Op);
17384 }
17385 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17386}
17387
17390 SDValue Op0 = N->getOperand(0);
17391 SDValue Op1 = N->getOperand(1);
17392 unsigned IsTop = N->getConstantOperandVal(2);
17393
17394 // VMOVNT a undef -> a
17395 // VMOVNB a undef -> a
17396 // VMOVNB undef a -> a
17397 if (Op1->isUndef())
17398 return Op0;
17399 if (Op0->isUndef() && !IsTop)
17400 return Op1;
17401
17402 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17403 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17404 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17405 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17406 Op1->getConstantOperandVal(2) == 0)
17407 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17408 Op0, Op1->getOperand(1), N->getOperand(2));
17409
17410 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17411 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17412 // into the top or bottom lanes.
17413 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17414 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17415 APInt Op0DemandedElts =
17416 IsTop ? Op1DemandedElts
17417 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17418
17419 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17420 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17421 return SDValue(N, 0);
17422 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17423 return SDValue(N, 0);
17424
17425 return SDValue();
17426}
17427
17430 SDValue Op0 = N->getOperand(0);
17431 unsigned IsTop = N->getConstantOperandVal(2);
17432
17433 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17434 APInt Op0DemandedElts =
17435 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17436 : APInt::getHighBitsSet(2, 1));
17437
17438 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17439 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17440 return SDValue(N, 0);
17441 return SDValue();
17442}
17443
17446 EVT VT = N->getValueType(0);
17447 SDValue LHS = N->getOperand(0);
17448 SDValue RHS = N->getOperand(1);
17449
17450 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17451 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17452 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17453 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17454 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17455 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17456 SDLoc DL(N);
17457 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17458 LHS.getOperand(0), RHS.getOperand(0));
17459 SDValue UndefV = LHS.getOperand(1);
17460 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17461 }
17462 return SDValue();
17463}
17464
17466 SDLoc DL(N);
17467 SDValue Op0 = N->getOperand(0);
17468 SDValue Op1 = N->getOperand(1);
17469
17470 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17471 // uses of the intrinsics.
17472 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17473 int ShiftAmt = C->getSExtValue();
17474 if (ShiftAmt == 0) {
17475 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17476 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17477 return SDValue();
17478 }
17479
17480 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17481 unsigned NewOpcode =
17482 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17483 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17484 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17485 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17486 return NewShift;
17487 }
17488 }
17489
17490 return SDValue();
17491}
17492
17493/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17495 DAGCombinerInfo &DCI) const {
17496 SelectionDAG &DAG = DCI.DAG;
17497 unsigned IntNo = N->getConstantOperandVal(0);
17498 switch (IntNo) {
17499 default:
17500 // Don't do anything for most intrinsics.
17501 break;
17502
17503 // Vector shifts: check for immediate versions and lower them.
17504 // Note: This is done during DAG combining instead of DAG legalizing because
17505 // the build_vectors for 64-bit vector element shift counts are generally
17506 // not legal, and it is hard to see their values after they get legalized to
17507 // loads from a constant pool.
17508 case Intrinsic::arm_neon_vshifts:
17509 case Intrinsic::arm_neon_vshiftu:
17510 case Intrinsic::arm_neon_vrshifts:
17511 case Intrinsic::arm_neon_vrshiftu:
17512 case Intrinsic::arm_neon_vrshiftn:
17513 case Intrinsic::arm_neon_vqshifts:
17514 case Intrinsic::arm_neon_vqshiftu:
17515 case Intrinsic::arm_neon_vqshiftsu:
17516 case Intrinsic::arm_neon_vqshiftns:
17517 case Intrinsic::arm_neon_vqshiftnu:
17518 case Intrinsic::arm_neon_vqshiftnsu:
17519 case Intrinsic::arm_neon_vqrshiftns:
17520 case Intrinsic::arm_neon_vqrshiftnu:
17521 case Intrinsic::arm_neon_vqrshiftnsu: {
17522 EVT VT = N->getOperand(1).getValueType();
17523 int64_t Cnt;
17524 unsigned VShiftOpc = 0;
17525
17526 switch (IntNo) {
17527 case Intrinsic::arm_neon_vshifts:
17528 case Intrinsic::arm_neon_vshiftu:
17529 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17530 VShiftOpc = ARMISD::VSHLIMM;
17531 break;
17532 }
17533 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17534 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17536 break;
17537 }
17538 return SDValue();
17539
17540 case Intrinsic::arm_neon_vrshifts:
17541 case Intrinsic::arm_neon_vrshiftu:
17542 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17543 break;
17544 return SDValue();
17545
17546 case Intrinsic::arm_neon_vqshifts:
17547 case Intrinsic::arm_neon_vqshiftu:
17548 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17549 break;
17550 return SDValue();
17551
17552 case Intrinsic::arm_neon_vqshiftsu:
17553 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17554 break;
17555 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17556
17557 case Intrinsic::arm_neon_vrshiftn:
17558 case Intrinsic::arm_neon_vqshiftns:
17559 case Intrinsic::arm_neon_vqshiftnu:
17560 case Intrinsic::arm_neon_vqshiftnsu:
17561 case Intrinsic::arm_neon_vqrshiftns:
17562 case Intrinsic::arm_neon_vqrshiftnu:
17563 case Intrinsic::arm_neon_vqrshiftnsu:
17564 // Narrowing shifts require an immediate right shift.
17565 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17566 break;
17567 llvm_unreachable("invalid shift count for narrowing vector shift "
17568 "intrinsic");
17569
17570 default:
17571 llvm_unreachable("unhandled vector shift");
17572 }
17573
17574 switch (IntNo) {
17575 case Intrinsic::arm_neon_vshifts:
17576 case Intrinsic::arm_neon_vshiftu:
17577 // Opcode already set above.
17578 break;
17579 case Intrinsic::arm_neon_vrshifts:
17580 VShiftOpc = ARMISD::VRSHRsIMM;
17581 break;
17582 case Intrinsic::arm_neon_vrshiftu:
17583 VShiftOpc = ARMISD::VRSHRuIMM;
17584 break;
17585 case Intrinsic::arm_neon_vrshiftn:
17586 VShiftOpc = ARMISD::VRSHRNIMM;
17587 break;
17588 case Intrinsic::arm_neon_vqshifts:
17589 VShiftOpc = ARMISD::VQSHLsIMM;
17590 break;
17591 case Intrinsic::arm_neon_vqshiftu:
17592 VShiftOpc = ARMISD::VQSHLuIMM;
17593 break;
17594 case Intrinsic::arm_neon_vqshiftsu:
17595 VShiftOpc = ARMISD::VQSHLsuIMM;
17596 break;
17597 case Intrinsic::arm_neon_vqshiftns:
17598 VShiftOpc = ARMISD::VQSHRNsIMM;
17599 break;
17600 case Intrinsic::arm_neon_vqshiftnu:
17601 VShiftOpc = ARMISD::VQSHRNuIMM;
17602 break;
17603 case Intrinsic::arm_neon_vqshiftnsu:
17604 VShiftOpc = ARMISD::VQSHRNsuIMM;
17605 break;
17606 case Intrinsic::arm_neon_vqrshiftns:
17607 VShiftOpc = ARMISD::VQRSHRNsIMM;
17608 break;
17609 case Intrinsic::arm_neon_vqrshiftnu:
17610 VShiftOpc = ARMISD::VQRSHRNuIMM;
17611 break;
17612 case Intrinsic::arm_neon_vqrshiftnsu:
17613 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17614 break;
17615 }
17616
17617 SDLoc dl(N);
17618 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17619 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17620 }
17621
17622 case Intrinsic::arm_neon_vshiftins: {
17623 EVT VT = N->getOperand(1).getValueType();
17624 int64_t Cnt;
17625 unsigned VShiftOpc = 0;
17626
17627 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17628 VShiftOpc = ARMISD::VSLIIMM;
17629 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17630 VShiftOpc = ARMISD::VSRIIMM;
17631 else {
17632 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17633 }
17634
17635 SDLoc dl(N);
17636 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17637 N->getOperand(1), N->getOperand(2),
17638 DAG.getConstant(Cnt, dl, MVT::i32));
17639 }
17640
17641 case Intrinsic::arm_neon_vqrshifts:
17642 case Intrinsic::arm_neon_vqrshiftu:
17643 // No immediate versions of these to check for.
17644 break;
17645
17646 case Intrinsic::arm_mve_vqdmlah:
17647 case Intrinsic::arm_mve_vqdmlash:
17648 case Intrinsic::arm_mve_vqrdmlah:
17649 case Intrinsic::arm_mve_vqrdmlash:
17650 case Intrinsic::arm_mve_vmla_n_predicated:
17651 case Intrinsic::arm_mve_vmlas_n_predicated:
17652 case Intrinsic::arm_mve_vqdmlah_predicated:
17653 case Intrinsic::arm_mve_vqdmlash_predicated:
17654 case Intrinsic::arm_mve_vqrdmlah_predicated:
17655 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17656 // These intrinsics all take an i32 scalar operand which is narrowed to the
17657 // size of a single lane of the vector type they return. So we don't need
17658 // any bits of that operand above that point, which allows us to eliminate
17659 // uxth/sxth.
17660 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17661 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17662 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17663 return SDValue();
17664 break;
17665 }
17666
17667 case Intrinsic::arm_mve_minv:
17668 case Intrinsic::arm_mve_maxv:
17669 case Intrinsic::arm_mve_minav:
17670 case Intrinsic::arm_mve_maxav:
17671 case Intrinsic::arm_mve_minv_predicated:
17672 case Intrinsic::arm_mve_maxv_predicated:
17673 case Intrinsic::arm_mve_minav_predicated:
17674 case Intrinsic::arm_mve_maxav_predicated: {
17675 // These intrinsics all take an i32 scalar operand which is narrowed to the
17676 // size of a single lane of the vector type they take as the other input.
17677 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17678 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17679 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17680 return SDValue();
17681 break;
17682 }
17683
17684 case Intrinsic::arm_mve_addv: {
17685 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17686 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17687 bool Unsigned = N->getConstantOperandVal(2);
17688 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17689 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17690 }
17691
17692 case Intrinsic::arm_mve_addlv:
17693 case Intrinsic::arm_mve_addlv_predicated: {
17694 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17695 // which recombines the two outputs into an i64
17696 bool Unsigned = N->getConstantOperandVal(2);
17697 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17700
17702 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17703 if (i != 2) // skip the unsigned flag
17704 Ops.push_back(N->getOperand(i));
17705
17706 SDLoc dl(N);
17707 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17708 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17709 val.getValue(1));
17710 }
17711 }
17712
17713 return SDValue();
17714}
17715
17716/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17717/// lowers them. As with the vector shift intrinsics, this is done during DAG
17718/// combining instead of DAG legalizing because the build_vectors for 64-bit
17719/// vector element shift counts are generally not legal, and it is hard to see
17720/// their values after they get legalized to loads from a constant pool.
17723 const ARMSubtarget *ST) {
17724 SelectionDAG &DAG = DCI.DAG;
17725 EVT VT = N->getValueType(0);
17726
17727 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17728 N->getOperand(0)->getOpcode() == ISD::AND &&
17729 N->getOperand(0)->hasOneUse()) {
17730 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17731 return SDValue();
17732 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17733 // usually show up because instcombine prefers to canonicalize it to
17734 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17735 // out of GEP lowering in some cases.
17736 SDValue N0 = N->getOperand(0);
17737 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17738 if (!ShiftAmtNode)
17739 return SDValue();
17740 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17741 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17742 if (!AndMaskNode)
17743 return SDValue();
17744 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17745 // Don't transform uxtb/uxth.
17746 if (AndMask == 255 || AndMask == 65535)
17747 return SDValue();
17748 if (isMask_32(AndMask)) {
17749 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17750 if (MaskedBits > ShiftAmt) {
17751 SDLoc DL(N);
17752 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17753 DAG.getConstant(MaskedBits, DL, MVT::i32));
17754 return DAG.getNode(
17755 ISD::SRL, DL, MVT::i32, SHL,
17756 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17757 }
17758 }
17759 }
17760
17761 // Nothing to be done for scalar shifts.
17762 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17763 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17764 return SDValue();
17765 if (ST->hasMVEIntegerOps())
17766 return SDValue();
17767
17768 int64_t Cnt;
17769
17770 switch (N->getOpcode()) {
17771 default: llvm_unreachable("unexpected shift opcode");
17772
17773 case ISD::SHL:
17774 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17775 SDLoc dl(N);
17776 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17777 DAG.getConstant(Cnt, dl, MVT::i32));
17778 }
17779 break;
17780
17781 case ISD::SRA:
17782 case ISD::SRL:
17783 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17784 unsigned VShiftOpc =
17785 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17786 SDLoc dl(N);
17787 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17788 DAG.getConstant(Cnt, dl, MVT::i32));
17789 }
17790 }
17791 return SDValue();
17792}
17793
17794// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17795// split into multiple extending loads, which are simpler to deal with than an
17796// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17797// to convert the type to an f32.
17799 SDValue N0 = N->getOperand(0);
17800 if (N0.getOpcode() != ISD::LOAD)
17801 return SDValue();
17802 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17803 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17804 LD->getExtensionType() != ISD::NON_EXTLOAD)
17805 return SDValue();
17806 EVT FromVT = LD->getValueType(0);
17807 EVT ToVT = N->getValueType(0);
17808 if (!ToVT.isVector())
17809 return SDValue();
17811 EVT ToEltVT = ToVT.getVectorElementType();
17812 EVT FromEltVT = FromVT.getVectorElementType();
17813
17814 unsigned NumElements = 0;
17815 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17816 NumElements = 4;
17817 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17818 NumElements = 4;
17819 if (NumElements == 0 ||
17820 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17821 FromVT.getVectorNumElements() % NumElements != 0 ||
17822 !isPowerOf2_32(NumElements))
17823 return SDValue();
17824
17825 LLVMContext &C = *DAG.getContext();
17826 SDLoc DL(LD);
17827 // Details about the old load
17828 SDValue Ch = LD->getChain();
17829 SDValue BasePtr = LD->getBasePtr();
17830 Align Alignment = LD->getOriginalAlign();
17831 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17832 AAMDNodes AAInfo = LD->getAAInfo();
17833
17834 ISD::LoadExtType NewExtType =
17835 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17836 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17837 EVT NewFromVT = EVT::getVectorVT(
17838 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17839 EVT NewToVT = EVT::getVectorVT(
17840 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17841
17844 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17845 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17846 SDValue NewPtr =
17847 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17848
17849 SDValue NewLoad =
17850 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17851 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17852 Alignment, MMOFlags, AAInfo);
17853 Loads.push_back(NewLoad);
17854 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17855 }
17856
17857 // Float truncs need to extended with VCVTB's into their floating point types.
17858 if (FromEltVT == MVT::f16) {
17860
17861 for (unsigned i = 0; i < Loads.size(); i++) {
17862 SDValue LoadBC =
17863 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17864 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17865 DAG.getConstant(0, DL, MVT::i32));
17866 Extends.push_back(FPExt);
17867 }
17868
17869 Loads = Extends;
17870 }
17871
17872 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17873 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17874 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17875}
17876
17877/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17878/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17880 const ARMSubtarget *ST) {
17881 SDValue N0 = N->getOperand(0);
17882
17883 // Check for sign- and zero-extensions of vector extract operations of 8- and
17884 // 16-bit vector elements. NEON and MVE support these directly. They are
17885 // handled during DAG combining because type legalization will promote them
17886 // to 32-bit types and it is messy to recognize the operations after that.
17887 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17889 SDValue Vec = N0.getOperand(0);
17890 SDValue Lane = N0.getOperand(1);
17891 EVT VT = N->getValueType(0);
17892 EVT EltVT = N0.getValueType();
17893 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17894
17895 if (VT == MVT::i32 &&
17896 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17897 TLI.isTypeLegal(Vec.getValueType()) &&
17898 isa<ConstantSDNode>(Lane)) {
17899
17900 unsigned Opc = 0;
17901 switch (N->getOpcode()) {
17902 default: llvm_unreachable("unexpected opcode");
17903 case ISD::SIGN_EXTEND:
17904 Opc = ARMISD::VGETLANEs;
17905 break;
17906 case ISD::ZERO_EXTEND:
17907 case ISD::ANY_EXTEND:
17908 Opc = ARMISD::VGETLANEu;
17909 break;
17910 }
17911 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17912 }
17913 }
17914
17915 if (ST->hasMVEIntegerOps())
17916 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17917 return NewLoad;
17918
17919 return SDValue();
17920}
17921
17923 const ARMSubtarget *ST) {
17924 if (ST->hasMVEFloatOps())
17925 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17926 return NewLoad;
17927
17928 return SDValue();
17929}
17930
17931// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17932// constant bounds.
17934 const ARMSubtarget *Subtarget) {
17935 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17936 !Subtarget->isThumb2())
17937 return SDValue();
17938
17939 EVT VT = Op.getValueType();
17940 SDValue Op0 = Op.getOperand(0);
17941
17942 if (VT != MVT::i32 ||
17943 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17944 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17945 !isa<ConstantSDNode>(Op0.getOperand(1)))
17946 return SDValue();
17947
17948 SDValue Min = Op;
17949 SDValue Max = Op0;
17950 SDValue Input = Op0.getOperand(0);
17951 if (Min.getOpcode() == ISD::SMAX)
17952 std::swap(Min, Max);
17953
17954 APInt MinC = Min.getConstantOperandAPInt(1);
17955 APInt MaxC = Max.getConstantOperandAPInt(1);
17956
17957 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17958 !(MinC + 1).isPowerOf2())
17959 return SDValue();
17960
17961 SDLoc DL(Op);
17962 if (MinC == ~MaxC)
17963 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17964 DAG.getConstant(MinC.countr_one(), DL, VT));
17965 if (MaxC == 0)
17966 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17967 DAG.getConstant(MinC.countr_one(), DL, VT));
17968
17969 return SDValue();
17970}
17971
17972/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17973/// saturates.
17975 const ARMSubtarget *ST) {
17976 EVT VT = N->getValueType(0);
17977 SDValue N0 = N->getOperand(0);
17978
17979 if (VT == MVT::i32)
17980 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17981
17982 if (!ST->hasMVEIntegerOps())
17983 return SDValue();
17984
17985 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17986 return V;
17987
17988 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17989 return SDValue();
17990
17991 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17992 // Check one is a smin and the other is a smax
17993 if (Min->getOpcode() != ISD::SMIN)
17994 std::swap(Min, Max);
17995 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17996 return false;
17997
17998 APInt SaturateC;
17999 if (VT == MVT::v4i32)
18000 SaturateC = APInt(32, (1 << 15) - 1, true);
18001 else //if (VT == MVT::v8i16)
18002 SaturateC = APInt(16, (1 << 7) - 1, true);
18003
18004 APInt MinC, MaxC;
18005 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18006 MinC != SaturateC)
18007 return false;
18008 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18009 MaxC != ~SaturateC)
18010 return false;
18011 return true;
18012 };
18013
18014 if (IsSignedSaturate(N, N0.getNode())) {
18015 SDLoc DL(N);
18016 MVT ExtVT, HalfVT;
18017 if (VT == MVT::v4i32) {
18018 HalfVT = MVT::v8i16;
18019 ExtVT = MVT::v4i16;
18020 } else { // if (VT == MVT::v8i16)
18021 HalfVT = MVT::v16i8;
18022 ExtVT = MVT::v8i8;
18023 }
18024
18025 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18026 // half. That extend will hopefully be removed if only the bottom bits are
18027 // demanded (though a truncating store, for example).
18028 SDValue VQMOVN =
18029 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18030 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18031 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18032 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18033 DAG.getValueType(ExtVT));
18034 }
18035
18036 auto IsUnsignedSaturate = [&](SDNode *Min) {
18037 // For unsigned, we just need to check for <= 0xffff
18038 if (Min->getOpcode() != ISD::UMIN)
18039 return false;
18040
18041 APInt SaturateC;
18042 if (VT == MVT::v4i32)
18043 SaturateC = APInt(32, (1 << 16) - 1, true);
18044 else //if (VT == MVT::v8i16)
18045 SaturateC = APInt(16, (1 << 8) - 1, true);
18046
18047 APInt MinC;
18048 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18049 MinC != SaturateC)
18050 return false;
18051 return true;
18052 };
18053
18054 if (IsUnsignedSaturate(N)) {
18055 SDLoc DL(N);
18056 MVT HalfVT;
18057 unsigned ExtConst;
18058 if (VT == MVT::v4i32) {
18059 HalfVT = MVT::v8i16;
18060 ExtConst = 0x0000FFFF;
18061 } else { //if (VT == MVT::v8i16)
18062 HalfVT = MVT::v16i8;
18063 ExtConst = 0x00FF;
18064 }
18065
18066 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18067 // an AND. That extend will hopefully be removed if only the bottom bits are
18068 // demanded (though a truncating store, for example).
18069 SDValue VQMOVN =
18070 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18071 DAG.getConstant(0, DL, MVT::i32));
18072 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18073 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18074 DAG.getConstant(ExtConst, DL, VT));
18075 }
18076
18077 return SDValue();
18078}
18079
18081 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18082 if (!C)
18083 return nullptr;
18084 const APInt *CV = &C->getAPIntValue();
18085 return CV->isPowerOf2() ? CV : nullptr;
18086}
18087
18089 // If we have a CMOV, OR and AND combination such as:
18090 // if (x & CN)
18091 // y |= CM;
18092 //
18093 // And:
18094 // * CN is a single bit;
18095 // * All bits covered by CM are known zero in y
18096 //
18097 // Then we can convert this into a sequence of BFI instructions. This will
18098 // always be a win if CM is a single bit, will always be no worse than the
18099 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18100 // three bits (due to the extra IT instruction).
18101
18102 SDValue Op0 = CMOV->getOperand(0);
18103 SDValue Op1 = CMOV->getOperand(1);
18104 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18105 SDValue CmpZ = CMOV->getOperand(4);
18106
18107 // The compare must be against zero.
18108 if (!isNullConstant(CmpZ->getOperand(1)))
18109 return SDValue();
18110
18111 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18112 SDValue And = CmpZ->getOperand(0);
18113 if (And->getOpcode() != ISD::AND)
18114 return SDValue();
18115 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18116 if (!AndC)
18117 return SDValue();
18118 SDValue X = And->getOperand(0);
18119
18120 if (CC == ARMCC::EQ) {
18121 // We're performing an "equal to zero" compare. Swap the operands so we
18122 // canonicalize on a "not equal to zero" compare.
18123 std::swap(Op0, Op1);
18124 } else {
18125 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18126 }
18127
18128 if (Op1->getOpcode() != ISD::OR)
18129 return SDValue();
18130
18131 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18132 if (!OrC)
18133 return SDValue();
18134 SDValue Y = Op1->getOperand(0);
18135
18136 if (Op0 != Y)
18137 return SDValue();
18138
18139 // Now, is it profitable to continue?
18140 APInt OrCI = OrC->getAPIntValue();
18141 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18142 if (OrCI.popcount() > Heuristic)
18143 return SDValue();
18144
18145 // Lastly, can we determine that the bits defined by OrCI
18146 // are zero in Y?
18147 KnownBits Known = DAG.computeKnownBits(Y);
18148 if ((OrCI & Known.Zero) != OrCI)
18149 return SDValue();
18150
18151 // OK, we can do the combine.
18152 SDValue V = Y;
18153 SDLoc dl(X);
18154 EVT VT = X.getValueType();
18155 unsigned BitInX = AndC->logBase2();
18156
18157 if (BitInX != 0) {
18158 // We must shift X first.
18159 X = DAG.getNode(ISD::SRL, dl, VT, X,
18160 DAG.getConstant(BitInX, dl, VT));
18161 }
18162
18163 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18164 BitInY < NumActiveBits; ++BitInY) {
18165 if (OrCI[BitInY] == 0)
18166 continue;
18167 APInt Mask(VT.getSizeInBits(), 0);
18168 Mask.setBit(BitInY);
18169 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18170 // Confusingly, the operand is an *inverted* mask.
18171 DAG.getConstant(~Mask, dl, VT));
18172 }
18173
18174 return V;
18175}
18176
18177// Given N, the value controlling the conditional branch, search for the loop
18178// intrinsic, returning it, along with how the value is used. We need to handle
18179// patterns such as the following:
18180// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18181// (brcond (setcc (loop.decrement), 0, eq), exit)
18182// (brcond (setcc (loop.decrement), 0, ne), header)
18184 bool &Negate) {
18185 switch (N->getOpcode()) {
18186 default:
18187 break;
18188 case ISD::XOR: {
18189 if (!isa<ConstantSDNode>(N.getOperand(1)))
18190 return SDValue();
18191 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18192 return SDValue();
18193 Negate = !Negate;
18194 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18195 }
18196 case ISD::SETCC: {
18197 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18198 if (!Const)
18199 return SDValue();
18200 if (Const->isZero())
18201 Imm = 0;
18202 else if (Const->isOne())
18203 Imm = 1;
18204 else
18205 return SDValue();
18206 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18207 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18208 }
18210 unsigned IntOp = N.getConstantOperandVal(1);
18211 if (IntOp != Intrinsic::test_start_loop_iterations &&
18212 IntOp != Intrinsic::loop_decrement_reg)
18213 return SDValue();
18214 return N;
18215 }
18216 }
18217 return SDValue();
18218}
18219
18222 const ARMSubtarget *ST) {
18223
18224 // The hwloop intrinsics that we're interested are used for control-flow,
18225 // either for entering or exiting the loop:
18226 // - test.start.loop.iterations will test whether its operand is zero. If it
18227 // is zero, the proceeding branch should not enter the loop.
18228 // - loop.decrement.reg also tests whether its operand is zero. If it is
18229 // zero, the proceeding branch should not branch back to the beginning of
18230 // the loop.
18231 // So here, we need to check that how the brcond is using the result of each
18232 // of the intrinsics to ensure that we're branching to the right place at the
18233 // right time.
18234
18236 SDValue Cond;
18237 int Imm = 1;
18238 bool Negate = false;
18239 SDValue Chain = N->getOperand(0);
18240 SDValue Dest;
18241
18242 if (N->getOpcode() == ISD::BRCOND) {
18243 CC = ISD::SETEQ;
18244 Cond = N->getOperand(1);
18245 Dest = N->getOperand(2);
18246 } else {
18247 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18248 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18249 Cond = N->getOperand(2);
18250 Dest = N->getOperand(4);
18251 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18252 if (!Const->isOne() && !Const->isZero())
18253 return SDValue();
18254 Imm = Const->getZExtValue();
18255 } else
18256 return SDValue();
18257 }
18258
18259 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18260 if (!Int)
18261 return SDValue();
18262
18263 if (Negate)
18264 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18265
18266 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18267 return (CC == ISD::SETEQ && Imm == 0) ||
18268 (CC == ISD::SETNE && Imm == 1) ||
18269 (CC == ISD::SETLT && Imm == 1) ||
18270 (CC == ISD::SETULT && Imm == 1);
18271 };
18272
18273 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18274 return (CC == ISD::SETEQ && Imm == 1) ||
18275 (CC == ISD::SETNE && Imm == 0) ||
18276 (CC == ISD::SETGT && Imm == 0) ||
18277 (CC == ISD::SETUGT && Imm == 0) ||
18278 (CC == ISD::SETGE && Imm == 1) ||
18279 (CC == ISD::SETUGE && Imm == 1);
18280 };
18281
18282 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18283 "unsupported condition");
18284
18285 SDLoc dl(Int);
18286 SelectionDAG &DAG = DCI.DAG;
18287 SDValue Elements = Int.getOperand(2);
18288 unsigned IntOp = Int->getConstantOperandVal(1);
18289 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18290 && "expected single br user");
18291 SDNode *Br = *N->use_begin();
18292 SDValue OtherTarget = Br->getOperand(1);
18293
18294 // Update the unconditional branch to branch to the given Dest.
18295 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18296 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18297 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18298 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18299 };
18300
18301 if (IntOp == Intrinsic::test_start_loop_iterations) {
18302 SDValue Res;
18303 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18304 // We expect this 'instruction' to branch when the counter is zero.
18305 if (IsTrueIfZero(CC, Imm)) {
18306 SDValue Ops[] = {Chain, Setup, Dest};
18307 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18308 } else {
18309 // The logic is the reverse of what we need for WLS, so find the other
18310 // basic block target: the target of the proceeding br.
18311 UpdateUncondBr(Br, Dest, DAG);
18312
18313 SDValue Ops[] = {Chain, Setup, OtherTarget};
18314 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18315 }
18316 // Update LR count to the new value
18317 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18318 // Update chain
18319 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18320 return Res;
18321 } else {
18322 SDValue Size =
18323 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18324 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18325 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18326 DAG.getVTList(MVT::i32, MVT::Other), Args);
18327 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18328
18329 // We expect this instruction to branch when the count is not zero.
18330 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18331
18332 // Update the unconditional branch to target the loop preheader if we've
18333 // found the condition has been reversed.
18334 if (Target == OtherTarget)
18335 UpdateUncondBr(Br, Dest, DAG);
18336
18337 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18338 SDValue(LoopDec.getNode(), 1), Chain);
18339
18340 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18341 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18342 }
18343 return SDValue();
18344}
18345
18346/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18347SDValue
18349 SDValue Cmp = N->getOperand(4);
18350 if (Cmp.getOpcode() != ARMISD::CMPZ)
18351 // Only looking at NE cases.
18352 return SDValue();
18353
18354 EVT VT = N->getValueType(0);
18355 SDLoc dl(N);
18356 SDValue LHS = Cmp.getOperand(0);
18357 SDValue RHS = Cmp.getOperand(1);
18358 SDValue Chain = N->getOperand(0);
18359 SDValue BB = N->getOperand(1);
18360 SDValue ARMcc = N->getOperand(2);
18362
18363 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18364 // -> (brcond Chain BB CC CPSR Cmp)
18365 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18366 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18367 LHS->getOperand(0)->hasOneUse() &&
18368 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18369 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18370 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18371 return DAG.getNode(
18372 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18373 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18374 }
18375
18376 return SDValue();
18377}
18378
18379/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18380SDValue
18382 SDValue Cmp = N->getOperand(4);
18383 if (Cmp.getOpcode() != ARMISD::CMPZ)
18384 // Only looking at EQ and NE cases.
18385 return SDValue();
18386
18387 EVT VT = N->getValueType(0);
18388 SDLoc dl(N);
18389 SDValue LHS = Cmp.getOperand(0);
18390 SDValue RHS = Cmp.getOperand(1);
18391 SDValue FalseVal = N->getOperand(0);
18392 SDValue TrueVal = N->getOperand(1);
18393 SDValue ARMcc = N->getOperand(2);
18395
18396 // BFI is only available on V6T2+.
18397 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18399 if (R)
18400 return R;
18401 }
18402
18403 // Simplify
18404 // mov r1, r0
18405 // cmp r1, x
18406 // mov r0, y
18407 // moveq r0, x
18408 // to
18409 // cmp r0, x
18410 // movne r0, y
18411 //
18412 // mov r1, r0
18413 // cmp r1, x
18414 // mov r0, x
18415 // movne r0, y
18416 // to
18417 // cmp r0, x
18418 // movne r0, y
18419 /// FIXME: Turn this into a target neutral optimization?
18420 SDValue Res;
18421 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18422 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18423 N->getOperand(3), Cmp);
18424 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18425 SDValue ARMcc;
18426 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18427 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18428 N->getOperand(3), NewCmp);
18429 }
18430
18431 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18432 // -> (cmov F T CC CPSR Cmp)
18433 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18434 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18436 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18437 LHS->getOperand(2), LHS->getOperand(3),
18438 LHS->getOperand(4));
18439 }
18440
18441 if (!VT.isInteger())
18442 return SDValue();
18443
18444 // Fold away an unneccessary CMPZ/CMOV
18445 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18446 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18447 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18448 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18449 N->getConstantOperandVal(2) == ARMCC::NE) {
18451 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18452 if (N->getConstantOperandVal(2) == ARMCC::NE)
18454 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18455 N->getOperand(1),
18456 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18457 N->getOperand(3), C);
18458 }
18459 }
18460
18461 // Materialize a boolean comparison for integers so we can avoid branching.
18462 if (isNullConstant(FalseVal)) {
18463 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18464 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18465 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18466 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18467 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18468 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18469 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18470 DAG.getConstant(5, dl, MVT::i32));
18471 } else {
18472 // CMOV 0, 1, ==, (CMPZ x, y) ->
18473 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18474 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18475 //
18476 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18477 // x != y. In other words, a carry C == 1 when x == y, C == 0
18478 // otherwise.
18479 // The final UADDO_CARRY computes
18480 // x - y + (0 - (x - y)) + C == C
18481 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18482 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18483 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18484 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18485 // actually.
18486 SDValue Carry =
18487 DAG.getNode(ISD::SUB, dl, MVT::i32,
18488 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18489 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18490 }
18491 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18492 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18493 // This seems pointless but will allow us to combine it further below.
18494 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18495 SDValue Sub =
18496 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18497 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18498 Sub.getValue(1), SDValue());
18499 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18500 N->getOperand(3), CPSRGlue.getValue(1));
18501 FalseVal = Sub;
18502 }
18503 } else if (isNullConstant(TrueVal)) {
18504 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18505 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18506 // This seems pointless but will allow us to combine it further below
18507 // Note that we change == for != as this is the dual for the case above.
18508 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18509 SDValue Sub =
18510 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18511 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18512 Sub.getValue(1), SDValue());
18513 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18514 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18515 N->getOperand(3), CPSRGlue.getValue(1));
18516 FalseVal = Sub;
18517 }
18518 }
18519
18520 // On Thumb1, the DAG above may be further combined if z is a power of 2
18521 // (z == 2 ^ K).
18522 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18523 // t1 = (USUBO (SUB x, y), 1)
18524 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18525 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18526 //
18527 // This also handles the special case of comparing against zero; it's
18528 // essentially, the same pattern, except there's no SUBC:
18529 // CMOV x, z, !=, (CMPZ x, 0) ->
18530 // t1 = (USUBO x, 1)
18531 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18532 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18533 const APInt *TrueConst;
18534 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18535 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18536 FalseVal.getOperand(1) == RHS) ||
18537 (FalseVal == LHS && isNullConstant(RHS))) &&
18538 (TrueConst = isPowerOf2Constant(TrueVal))) {
18539 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18540 unsigned ShiftAmount = TrueConst->logBase2();
18541 if (ShiftAmount)
18542 TrueVal = DAG.getConstant(1, dl, VT);
18543 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18544 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18545 Subc.getValue(1));
18546
18547 if (ShiftAmount)
18548 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18549 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18550 }
18551
18552 if (Res.getNode()) {
18553 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18554 // Capture demanded bits information that would be otherwise lost.
18555 if (Known.Zero == 0xfffffffe)
18556 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18557 DAG.getValueType(MVT::i1));
18558 else if (Known.Zero == 0xffffff00)
18559 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18560 DAG.getValueType(MVT::i8));
18561 else if (Known.Zero == 0xffff0000)
18562 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18563 DAG.getValueType(MVT::i16));
18564 }
18565
18566 return Res;
18567}
18568
18571 const ARMSubtarget *ST) {
18572 SelectionDAG &DAG = DCI.DAG;
18573 SDValue Src = N->getOperand(0);
18574 EVT DstVT = N->getValueType(0);
18575
18576 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18577 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18578 EVT SrcVT = Src.getValueType();
18579 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18580 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18581 }
18582
18583 // We may have a bitcast of something that has already had this bitcast
18584 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18585 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18586 Src = Src.getOperand(0);
18587
18588 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18589 // would be generated is at least the width of the element type.
18590 EVT SrcVT = Src.getValueType();
18591 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18592 Src.getOpcode() == ARMISD::VMVNIMM ||
18593 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18594 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18595 DAG.getDataLayout().isBigEndian())
18596 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18597
18598 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18599 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18600 return R;
18601
18602 return SDValue();
18603}
18604
18605// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18606// node into stack operations after legalizeOps.
18609 SelectionDAG &DAG = DCI.DAG;
18610 EVT VT = N->getValueType(0);
18611 SDLoc DL(N);
18612
18613 // MVETrunc(Undef, Undef) -> Undef
18614 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18615 return DAG.getUNDEF(VT);
18616
18617 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18618 if (N->getNumOperands() == 2 &&
18619 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18620 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18621 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18622 N->getOperand(0).getOperand(1),
18623 N->getOperand(1).getOperand(0),
18624 N->getOperand(1).getOperand(1));
18625
18626 // MVETrunc(shuffle, shuffle) -> VMOVN
18627 if (N->getNumOperands() == 2 &&
18628 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18629 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18630 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18631 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18632
18633 if (S0->getOperand(0) == S1->getOperand(0) &&
18634 S0->getOperand(1) == S1->getOperand(1)) {
18635 // Construct complete shuffle mask
18636 SmallVector<int, 8> Mask(S0->getMask());
18637 Mask.append(S1->getMask().begin(), S1->getMask().end());
18638
18639 if (isVMOVNTruncMask(Mask, VT, false))
18640 return DAG.getNode(
18641 ARMISD::VMOVN, DL, VT,
18642 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18643 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18644 DAG.getConstant(1, DL, MVT::i32));
18645 if (isVMOVNTruncMask(Mask, VT, true))
18646 return DAG.getNode(
18647 ARMISD::VMOVN, DL, VT,
18648 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18649 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18650 DAG.getConstant(1, DL, MVT::i32));
18651 }
18652 }
18653
18654 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18655 // truncate to a buildvector to allow the generic optimisations to kick in.
18656 if (all_of(N->ops(), [](SDValue Op) {
18657 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18658 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18659 (Op.getOpcode() == ISD::BITCAST &&
18660 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18661 })) {
18662 SmallVector<SDValue, 8> Extracts;
18663 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18664 SDValue O = N->getOperand(Op);
18665 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18666 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18667 DAG.getConstant(i, DL, MVT::i32));
18668 Extracts.push_back(Ext);
18669 }
18670 }
18671 return DAG.getBuildVector(VT, DL, Extracts);
18672 }
18673
18674 // If we are late in the legalization process and nothing has optimised
18675 // the trunc to anything better, lower it to a stack store and reload,
18676 // performing the truncation whilst keeping the lanes in the correct order:
18677 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18678 if (!DCI.isAfterLegalizeDAG())
18679 return SDValue();
18680
18681 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18682 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18683 int NumIns = N->getNumOperands();
18684 assert((NumIns == 2 || NumIns == 4) &&
18685 "Expected 2 or 4 inputs to an MVETrunc");
18686 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18687 if (N->getNumOperands() == 4)
18688 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18689
18690 SmallVector<SDValue> Chains;
18691 for (int I = 0; I < NumIns; I++) {
18692 SDValue Ptr = DAG.getNode(
18693 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18694 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18696 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18697 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18698 Ptr, MPI, StoreVT, Align(4));
18699 Chains.push_back(Ch);
18700 }
18701
18702 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18703 MachinePointerInfo MPI =
18705 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18706}
18707
18708// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18710 SelectionDAG &DAG) {
18711 SDValue N0 = N->getOperand(0);
18712 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18713 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18714 return SDValue();
18715
18716 EVT FromVT = LD->getMemoryVT();
18717 EVT ToVT = N->getValueType(0);
18718 if (!ToVT.isVector())
18719 return SDValue();
18720 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18721 EVT ToEltVT = ToVT.getVectorElementType();
18722 EVT FromEltVT = FromVT.getVectorElementType();
18723
18724 unsigned NumElements = 0;
18725 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18726 NumElements = 4;
18727 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18728 NumElements = 8;
18729 assert(NumElements != 0);
18730
18731 ISD::LoadExtType NewExtType =
18732 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18733 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18734 LD->getExtensionType() != ISD::EXTLOAD &&
18735 LD->getExtensionType() != NewExtType)
18736 return SDValue();
18737
18738 LLVMContext &C = *DAG.getContext();
18739 SDLoc DL(LD);
18740 // Details about the old load
18741 SDValue Ch = LD->getChain();
18742 SDValue BasePtr = LD->getBasePtr();
18743 Align Alignment = LD->getOriginalAlign();
18744 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18745 AAMDNodes AAInfo = LD->getAAInfo();
18746
18747 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18748 EVT NewFromVT = EVT::getVectorVT(
18749 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18750 EVT NewToVT = EVT::getVectorVT(
18751 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18752
18755 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18756 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18757 SDValue NewPtr =
18758 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18759
18760 SDValue NewLoad =
18761 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18762 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18763 Alignment, MMOFlags, AAInfo);
18764 Loads.push_back(NewLoad);
18765 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18766 }
18767
18768 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18769 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18770 return DAG.getMergeValues(Loads, DL);
18771}
18772
18773// Perform combines for MVEEXT. If it has not be optimized to anything better
18774// before lowering, it gets converted to stack store and extloads performing the
18775// extend whilst still keeping the same lane ordering.
18778 SelectionDAG &DAG = DCI.DAG;
18779 EVT VT = N->getValueType(0);
18780 SDLoc DL(N);
18781 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18782 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18783
18784 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18785 *DAG.getContext());
18786 auto Extend = [&](SDValue V) {
18787 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18788 return N->getOpcode() == ARMISD::MVESEXT
18789 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18790 DAG.getValueType(ExtVT))
18791 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18792 };
18793
18794 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18795 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18796 SDValue Ext = Extend(N->getOperand(0));
18797 return DAG.getMergeValues({Ext, Ext}, DL);
18798 }
18799
18800 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18801 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18802 ArrayRef<int> Mask = SVN->getMask();
18803 assert(Mask.size() == 2 * VT.getVectorNumElements());
18804 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18805 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18806 SDValue Op0 = SVN->getOperand(0);
18807 SDValue Op1 = SVN->getOperand(1);
18808
18809 auto CheckInregMask = [&](int Start, int Offset) {
18810 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18811 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18812 return false;
18813 return true;
18814 };
18815 SDValue V0 = SDValue(N, 0);
18816 SDValue V1 = SDValue(N, 1);
18817 if (CheckInregMask(0, 0))
18818 V0 = Extend(Op0);
18819 else if (CheckInregMask(0, 1))
18820 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18821 else if (CheckInregMask(0, Mask.size()))
18822 V0 = Extend(Op1);
18823 else if (CheckInregMask(0, Mask.size() + 1))
18824 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18825
18826 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18827 V1 = Extend(Op1);
18828 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18829 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18830 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18831 V1 = Extend(Op0);
18832 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18833 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18834
18835 if (V0.getNode() != N || V1.getNode() != N)
18836 return DAG.getMergeValues({V0, V1}, DL);
18837 }
18838
18839 // MVEEXT(load) -> extload, extload
18840 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18842 return L;
18843
18844 if (!DCI.isAfterLegalizeDAG())
18845 return SDValue();
18846
18847 // Lower to a stack store and reload:
18848 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18849 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18850 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18851 int NumOuts = N->getNumValues();
18852 assert((NumOuts == 2 || NumOuts == 4) &&
18853 "Expected 2 or 4 outputs to an MVEEXT");
18854 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18855 *DAG.getContext());
18856 if (N->getNumOperands() == 4)
18857 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18858
18859 MachinePointerInfo MPI =
18861 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18862 StackPtr, MPI, Align(4));
18863
18865 for (int I = 0; I < NumOuts; I++) {
18866 SDValue Ptr = DAG.getNode(
18867 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18868 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18870 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18871 SDValue Load = DAG.getExtLoad(
18872 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18873 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18874 Loads.push_back(Load);
18875 }
18876
18877 return DAG.getMergeValues(Loads, DL);
18878}
18879
18881 DAGCombinerInfo &DCI) const {
18882 switch (N->getOpcode()) {
18883 default: break;
18884 case ISD::SELECT_CC:
18885 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18886 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18887 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18888 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18889 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18890 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18891 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18892 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18893 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18894 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18895 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18896 case ISD::BRCOND:
18897 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18898 case ARMISD::ADDC:
18899 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18900 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18901 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18902 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18903 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18904 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18905 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18906 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18907 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18910 return PerformExtractEltCombine(N, DCI, Subtarget);
18914 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18915 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18916 case ISD::FP_TO_SINT:
18917 case ISD::FP_TO_UINT:
18918 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18919 case ISD::FADD:
18920 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18921 case ISD::FMUL:
18922 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18924 return PerformIntrinsicCombine(N, DCI);
18925 case ISD::SHL:
18926 case ISD::SRA:
18927 case ISD::SRL:
18928 return PerformShiftCombine(N, DCI, Subtarget);
18929 case ISD::SIGN_EXTEND:
18930 case ISD::ZERO_EXTEND:
18931 case ISD::ANY_EXTEND:
18932 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18933 case ISD::FP_EXTEND:
18934 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18935 case ISD::SMIN:
18936 case ISD::UMIN:
18937 case ISD::SMAX:
18938 case ISD::UMAX:
18939 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18940 case ARMISD::CMOV:
18941 return PerformCMOVCombine(N, DCI.DAG);
18942 case ARMISD::BRCOND:
18943 return PerformBRCONDCombine(N, DCI.DAG);
18944 case ARMISD::CMPZ:
18945 return PerformCMPZCombine(N, DCI.DAG);
18946 case ARMISD::CSINC:
18947 case ARMISD::CSINV:
18948 case ARMISD::CSNEG:
18949 return PerformCSETCombine(N, DCI.DAG);
18950 case ISD::LOAD:
18951 return PerformLOADCombine(N, DCI, Subtarget);
18952 case ARMISD::VLD1DUP:
18953 case ARMISD::VLD2DUP:
18954 case ARMISD::VLD3DUP:
18955 case ARMISD::VLD4DUP:
18956 return PerformVLDCombine(N, DCI);
18958 return PerformARMBUILD_VECTORCombine(N, DCI);
18959 case ISD::BITCAST:
18960 return PerformBITCASTCombine(N, DCI, Subtarget);
18962 return PerformPREDICATE_CASTCombine(N, DCI);
18964 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18965 case ARMISD::MVETRUNC:
18966 return PerformMVETruncCombine(N, DCI);
18967 case ARMISD::MVESEXT:
18968 case ARMISD::MVEZEXT:
18969 return PerformMVEExtCombine(N, DCI);
18970 case ARMISD::VCMP:
18971 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18972 case ISD::VECREDUCE_ADD:
18973 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18974 case ARMISD::VADDVs:
18975 case ARMISD::VADDVu:
18976 case ARMISD::VADDLVs:
18977 case ARMISD::VADDLVu:
18978 case ARMISD::VADDLVAs:
18979 case ARMISD::VADDLVAu:
18980 case ARMISD::VMLAVs:
18981 case ARMISD::VMLAVu:
18982 case ARMISD::VMLALVs:
18983 case ARMISD::VMLALVu:
18984 case ARMISD::VMLALVAs:
18985 case ARMISD::VMLALVAu:
18986 return PerformReduceShuffleCombine(N, DCI.DAG);
18987 case ARMISD::VMOVN:
18988 return PerformVMOVNCombine(N, DCI);
18989 case ARMISD::VQMOVNs:
18990 case ARMISD::VQMOVNu:
18991 return PerformVQMOVNCombine(N, DCI);
18992 case ARMISD::VQDMULH:
18993 return PerformVQDMULHCombine(N, DCI);
18994 case ARMISD::ASRL:
18995 case ARMISD::LSRL:
18996 case ARMISD::LSLL:
18997 return PerformLongShiftCombine(N, DCI.DAG);
18998 case ARMISD::SMULWB: {
18999 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19000 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19001 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19002 return SDValue();
19003 break;
19004 }
19005 case ARMISD::SMULWT: {
19006 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19007 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19008 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19009 return SDValue();
19010 break;
19011 }
19012 case ARMISD::SMLALBB:
19013 case ARMISD::QADD16b:
19014 case ARMISD::QSUB16b:
19015 case ARMISD::UQADD16b:
19016 case ARMISD::UQSUB16b: {
19017 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19018 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19019 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19020 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19021 return SDValue();
19022 break;
19023 }
19024 case ARMISD::SMLALBT: {
19025 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19026 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19027 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19028 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19029 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19030 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19031 return SDValue();
19032 break;
19033 }
19034 case ARMISD::SMLALTB: {
19035 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19036 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19037 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19038 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19039 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19040 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19041 return SDValue();
19042 break;
19043 }
19044 case ARMISD::SMLALTT: {
19045 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19046 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19047 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19048 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19049 return SDValue();
19050 break;
19051 }
19052 case ARMISD::QADD8b:
19053 case ARMISD::QSUB8b:
19054 case ARMISD::UQADD8b:
19055 case ARMISD::UQSUB8b: {
19056 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19057 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19058 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19059 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19060 return SDValue();
19061 break;
19062 }
19065 switch (N->getConstantOperandVal(1)) {
19066 case Intrinsic::arm_neon_vld1:
19067 case Intrinsic::arm_neon_vld1x2:
19068 case Intrinsic::arm_neon_vld1x3:
19069 case Intrinsic::arm_neon_vld1x4:
19070 case Intrinsic::arm_neon_vld2:
19071 case Intrinsic::arm_neon_vld3:
19072 case Intrinsic::arm_neon_vld4:
19073 case Intrinsic::arm_neon_vld2lane:
19074 case Intrinsic::arm_neon_vld3lane:
19075 case Intrinsic::arm_neon_vld4lane:
19076 case Intrinsic::arm_neon_vld2dup:
19077 case Intrinsic::arm_neon_vld3dup:
19078 case Intrinsic::arm_neon_vld4dup:
19079 case Intrinsic::arm_neon_vst1:
19080 case Intrinsic::arm_neon_vst1x2:
19081 case Intrinsic::arm_neon_vst1x3:
19082 case Intrinsic::arm_neon_vst1x4:
19083 case Intrinsic::arm_neon_vst2:
19084 case Intrinsic::arm_neon_vst3:
19085 case Intrinsic::arm_neon_vst4:
19086 case Intrinsic::arm_neon_vst2lane:
19087 case Intrinsic::arm_neon_vst3lane:
19088 case Intrinsic::arm_neon_vst4lane:
19089 return PerformVLDCombine(N, DCI);
19090 case Intrinsic::arm_mve_vld2q:
19091 case Intrinsic::arm_mve_vld4q:
19092 case Intrinsic::arm_mve_vst2q:
19093 case Intrinsic::arm_mve_vst4q:
19094 return PerformMVEVLDCombine(N, DCI);
19095 default: break;
19096 }
19097 break;
19098 }
19099 return SDValue();
19100}
19101
19103 EVT VT) const {
19104 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19105}
19106
19108 Align Alignment,
19110 unsigned *Fast) const {
19111 // Depends what it gets converted into if the type is weird.
19112 if (!VT.isSimple())
19113 return false;
19114
19115 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19116 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19117 auto Ty = VT.getSimpleVT().SimpleTy;
19118
19119 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19120 // Unaligned access can use (for example) LRDB, LRDH, LDR
19121 if (AllowsUnaligned) {
19122 if (Fast)
19123 *Fast = Subtarget->hasV7Ops();
19124 return true;
19125 }
19126 }
19127
19128 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19129 // For any little-endian targets with neon, we can support unaligned ld/st
19130 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19131 // A big-endian target may also explicitly support unaligned accesses
19132 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19133 if (Fast)
19134 *Fast = 1;
19135 return true;
19136 }
19137 }
19138
19139 if (!Subtarget->hasMVEIntegerOps())
19140 return false;
19141
19142 // These are for predicates
19143 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19144 Ty == MVT::v2i1)) {
19145 if (Fast)
19146 *Fast = 1;
19147 return true;
19148 }
19149
19150 // These are for truncated stores/narrowing loads. They are fine so long as
19151 // the alignment is at least the size of the item being loaded
19152 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19153 Alignment >= VT.getScalarSizeInBits() / 8) {
19154 if (Fast)
19155 *Fast = true;
19156 return true;
19157 }
19158
19159 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19160 // VSTRW.U32 all store the vector register in exactly the same format, and
19161 // differ only in the range of their immediate offset field and the required
19162 // alignment. So there is always a store that can be used, regardless of
19163 // actual type.
19164 //
19165 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19166 // VREV64.8) pair and get the same effect. This will likely be better than
19167 // aligning the vector through the stack.
19168 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19169 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19170 Ty == MVT::v2f64) {
19171 if (Fast)
19172 *Fast = 1;
19173 return true;
19174 }
19175
19176 return false;
19177}
19178
19179
19181 const MemOp &Op, const AttributeList &FuncAttributes) const {
19182 // See if we can use NEON instructions for this...
19183 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19184 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19185 unsigned Fast;
19186 if (Op.size() >= 16 &&
19187 (Op.isAligned(Align(16)) ||
19188 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19190 Fast))) {
19191 return MVT::v2f64;
19192 } else if (Op.size() >= 8 &&
19193 (Op.isAligned(Align(8)) ||
19195 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19196 Fast))) {
19197 return MVT::f64;
19198 }
19199 }
19200
19201 // Let the target-independent logic figure it out.
19202 return MVT::Other;
19203}
19204
19205// 64-bit integers are split into their high and low parts and held in two
19206// different registers, so the trunc is free since the low register can just
19207// be used.
19208bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19209 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19210 return false;
19211 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19212 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19213 return (SrcBits == 64 && DestBits == 32);
19214}
19215
19217 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19218 !DstVT.isInteger())
19219 return false;
19220 unsigned SrcBits = SrcVT.getSizeInBits();
19221 unsigned DestBits = DstVT.getSizeInBits();
19222 return (SrcBits == 64 && DestBits == 32);
19223}
19224
19226 if (Val.getOpcode() != ISD::LOAD)
19227 return false;
19228
19229 EVT VT1 = Val.getValueType();
19230 if (!VT1.isSimple() || !VT1.isInteger() ||
19231 !VT2.isSimple() || !VT2.isInteger())
19232 return false;
19233
19234 switch (VT1.getSimpleVT().SimpleTy) {
19235 default: break;
19236 case MVT::i1:
19237 case MVT::i8:
19238 case MVT::i16:
19239 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19240 return true;
19241 }
19242
19243 return false;
19244}
19245
19247 if (!VT.isSimple())
19248 return false;
19249
19250 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19251 // negate values directly (fneg is free). So, we don't want to let the DAG
19252 // combiner rewrite fneg into xors and some other instructions. For f16 and
19253 // FullFP16 argument passing, some bitcast nodes may be introduced,
19254 // triggering this DAG combine rewrite, so we are avoiding that with this.
19255 switch (VT.getSimpleVT().SimpleTy) {
19256 default: break;
19257 case MVT::f16:
19258 return Subtarget->hasFullFP16();
19259 }
19260
19261 return false;
19262}
19263
19264/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19265/// of the vector elements.
19266static bool areExtractExts(Value *Ext1, Value *Ext2) {
19267 auto areExtDoubled = [](Instruction *Ext) {
19268 return Ext->getType()->getScalarSizeInBits() ==
19269 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19270 };
19271
19272 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19273 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19274 !areExtDoubled(cast<Instruction>(Ext1)) ||
19275 !areExtDoubled(cast<Instruction>(Ext2)))
19276 return false;
19277
19278 return true;
19279}
19280
19281/// Check if sinking \p I's operands to I's basic block is profitable, because
19282/// the operands can be folded into a target instruction, e.g.
19283/// sext/zext can be folded into vsubl.
19285 SmallVectorImpl<Use *> &Ops) const {
19286 if (!I->getType()->isVectorTy())
19287 return false;
19288
19289 if (Subtarget->hasNEON()) {
19290 switch (I->getOpcode()) {
19291 case Instruction::Sub:
19292 case Instruction::Add: {
19293 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19294 return false;
19295 Ops.push_back(&I->getOperandUse(0));
19296 Ops.push_back(&I->getOperandUse(1));
19297 return true;
19298 }
19299 default:
19300 return false;
19301 }
19302 }
19303
19304 if (!Subtarget->hasMVEIntegerOps())
19305 return false;
19306
19307 auto IsFMSMul = [&](Instruction *I) {
19308 if (!I->hasOneUse())
19309 return false;
19310 auto *Sub = cast<Instruction>(*I->users().begin());
19311 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19312 };
19313 auto IsFMS = [&](Instruction *I) {
19314 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19315 match(I->getOperand(1), m_FNeg(m_Value())))
19316 return true;
19317 return false;
19318 };
19319
19320 auto IsSinker = [&](Instruction *I, int Operand) {
19321 switch (I->getOpcode()) {
19322 case Instruction::Add:
19323 case Instruction::Mul:
19324 case Instruction::FAdd:
19325 case Instruction::ICmp:
19326 case Instruction::FCmp:
19327 return true;
19328 case Instruction::FMul:
19329 return !IsFMSMul(I);
19330 case Instruction::Sub:
19331 case Instruction::FSub:
19332 case Instruction::Shl:
19333 case Instruction::LShr:
19334 case Instruction::AShr:
19335 return Operand == 1;
19336 case Instruction::Call:
19337 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19338 switch (II->getIntrinsicID()) {
19339 case Intrinsic::fma:
19340 return !IsFMS(I);
19341 case Intrinsic::sadd_sat:
19342 case Intrinsic::uadd_sat:
19343 case Intrinsic::arm_mve_add_predicated:
19344 case Intrinsic::arm_mve_mul_predicated:
19345 case Intrinsic::arm_mve_qadd_predicated:
19346 case Intrinsic::arm_mve_vhadd:
19347 case Intrinsic::arm_mve_hadd_predicated:
19348 case Intrinsic::arm_mve_vqdmull:
19349 case Intrinsic::arm_mve_vqdmull_predicated:
19350 case Intrinsic::arm_mve_vqdmulh:
19351 case Intrinsic::arm_mve_qdmulh_predicated:
19352 case Intrinsic::arm_mve_vqrdmulh:
19353 case Intrinsic::arm_mve_qrdmulh_predicated:
19354 case Intrinsic::arm_mve_fma_predicated:
19355 return true;
19356 case Intrinsic::ssub_sat:
19357 case Intrinsic::usub_sat:
19358 case Intrinsic::arm_mve_sub_predicated:
19359 case Intrinsic::arm_mve_qsub_predicated:
19360 case Intrinsic::arm_mve_hsub_predicated:
19361 case Intrinsic::arm_mve_vhsub:
19362 return Operand == 1;
19363 default:
19364 return false;
19365 }
19366 }
19367 return false;
19368 default:
19369 return false;
19370 }
19371 };
19372
19373 for (auto OpIdx : enumerate(I->operands())) {
19374 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19375 // Make sure we are not already sinking this operand
19376 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19377 continue;
19378
19379 Instruction *Shuffle = Op;
19380 if (Shuffle->getOpcode() == Instruction::BitCast)
19381 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19382 // We are looking for a splat that can be sunk.
19383 if (!Shuffle ||
19384 !match(Shuffle, m_Shuffle(
19386 m_Undef(), m_ZeroMask())))
19387 continue;
19388 if (!IsSinker(I, OpIdx.index()))
19389 continue;
19390
19391 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19392 // and vector registers
19393 for (Use &U : Op->uses()) {
19394 Instruction *Insn = cast<Instruction>(U.getUser());
19395 if (!IsSinker(Insn, U.getOperandNo()))
19396 return false;
19397 }
19398
19399 Ops.push_back(&Shuffle->getOperandUse(0));
19400 if (Shuffle != Op)
19401 Ops.push_back(&Op->getOperandUse(0));
19402 Ops.push_back(&OpIdx.value());
19403 }
19404 return true;
19405}
19406
19408 if (!Subtarget->hasMVEIntegerOps())
19409 return nullptr;
19410 Type *SVIType = SVI->getType();
19411 Type *ScalarType = SVIType->getScalarType();
19412
19413 if (ScalarType->isFloatTy())
19414 return Type::getInt32Ty(SVIType->getContext());
19415 if (ScalarType->isHalfTy())
19416 return Type::getInt16Ty(SVIType->getContext());
19417 return nullptr;
19418}
19419
19421 EVT VT = ExtVal.getValueType();
19422
19423 if (!isTypeLegal(VT))
19424 return false;
19425
19426 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19427 if (Ld->isExpandingLoad())
19428 return false;
19429 }
19430
19431 if (Subtarget->hasMVEIntegerOps())
19432 return true;
19433
19434 // Don't create a loadext if we can fold the extension into a wide/long
19435 // instruction.
19436 // If there's more than one user instruction, the loadext is desirable no
19437 // matter what. There can be two uses by the same instruction.
19438 if (ExtVal->use_empty() ||
19439 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19440 return true;
19441
19442 SDNode *U = *ExtVal->use_begin();
19443 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19444 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19445 return false;
19446
19447 return true;
19448}
19449
19451 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19452 return false;
19453
19454 if (!isTypeLegal(EVT::getEVT(Ty1)))
19455 return false;
19456
19457 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19458
19459 // Assuming the caller doesn't have a zeroext or signext return parameter,
19460 // truncation all the way down to i1 is valid.
19461 return true;
19462}
19463
19464/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19465/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19466/// expanded to FMAs when this method returns true, otherwise fmuladd is
19467/// expanded to fmul + fadd.
19468///
19469/// ARM supports both fused and unfused multiply-add operations; we already
19470/// lower a pair of fmul and fadd to the latter so it's not clear that there
19471/// would be a gain or that the gain would be worthwhile enough to risk
19472/// correctness bugs.
19473///
19474/// For MVE, we set this to true as it helps simplify the need for some
19475/// patterns (and we don't have the non-fused floating point instruction).
19476bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19477 EVT VT) const {
19478 if (!VT.isSimple())
19479 return false;
19480
19481 switch (VT.getSimpleVT().SimpleTy) {
19482 case MVT::v4f32:
19483 case MVT::v8f16:
19484 return Subtarget->hasMVEFloatOps();
19485 case MVT::f16:
19486 return Subtarget->useFPVFMx16();
19487 case MVT::f32:
19488 return Subtarget->useFPVFMx();
19489 case MVT::f64:
19490 return Subtarget->useFPVFMx64();
19491 default:
19492 break;
19493 }
19494
19495 return false;
19496}
19497
19498static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19499 if (V < 0)
19500 return false;
19501
19502 unsigned Scale = 1;
19503 switch (VT.getSimpleVT().SimpleTy) {
19504 case MVT::i1:
19505 case MVT::i8:
19506 // Scale == 1;
19507 break;
19508 case MVT::i16:
19509 // Scale == 2;
19510 Scale = 2;
19511 break;
19512 default:
19513 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19514 // Scale == 4;
19515 Scale = 4;
19516 break;
19517 }
19518
19519 if ((V & (Scale - 1)) != 0)
19520 return false;
19521 return isUInt<5>(V / Scale);
19522}
19523
19524static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19525 const ARMSubtarget *Subtarget) {
19526 if (!VT.isInteger() && !VT.isFloatingPoint())
19527 return false;
19528 if (VT.isVector() && Subtarget->hasNEON())
19529 return false;
19530 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19531 !Subtarget->hasMVEFloatOps())
19532 return false;
19533
19534 bool IsNeg = false;
19535 if (V < 0) {
19536 IsNeg = true;
19537 V = -V;
19538 }
19539
19540 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19541
19542 // MVE: size * imm7
19543 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19544 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19545 case MVT::i32:
19546 case MVT::f32:
19547 return isShiftedUInt<7,2>(V);
19548 case MVT::i16:
19549 case MVT::f16:
19550 return isShiftedUInt<7,1>(V);
19551 case MVT::i8:
19552 return isUInt<7>(V);
19553 default:
19554 return false;
19555 }
19556 }
19557
19558 // half VLDR: 2 * imm8
19559 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19560 return isShiftedUInt<8, 1>(V);
19561 // VLDR and LDRD: 4 * imm8
19562 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19563 return isShiftedUInt<8, 2>(V);
19564
19565 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19566 // + imm12 or - imm8
19567 if (IsNeg)
19568 return isUInt<8>(V);
19569 return isUInt<12>(V);
19570 }
19571
19572 return false;
19573}
19574
19575/// isLegalAddressImmediate - Return true if the integer value can be used
19576/// as the offset of the target addressing mode for load / store of the
19577/// given type.
19578static bool isLegalAddressImmediate(int64_t V, EVT VT,
19579 const ARMSubtarget *Subtarget) {
19580 if (V == 0)
19581 return true;
19582
19583 if (!VT.isSimple())
19584 return false;
19585
19586 if (Subtarget->isThumb1Only())
19587 return isLegalT1AddressImmediate(V, VT);
19588 else if (Subtarget->isThumb2())
19589 return isLegalT2AddressImmediate(V, VT, Subtarget);
19590
19591 // ARM mode.
19592 if (V < 0)
19593 V = - V;
19594 switch (VT.getSimpleVT().SimpleTy) {
19595 default: return false;
19596 case MVT::i1:
19597 case MVT::i8:
19598 case MVT::i32:
19599 // +- imm12
19600 return isUInt<12>(V);
19601 case MVT::i16:
19602 // +- imm8
19603 return isUInt<8>(V);
19604 case MVT::f32:
19605 case MVT::f64:
19606 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19607 return false;
19608 return isShiftedUInt<8, 2>(V);
19609 }
19610}
19611
19613 EVT VT) const {
19614 int Scale = AM.Scale;
19615 if (Scale < 0)
19616 return false;
19617
19618 switch (VT.getSimpleVT().SimpleTy) {
19619 default: return false;
19620 case MVT::i1:
19621 case MVT::i8:
19622 case MVT::i16:
19623 case MVT::i32:
19624 if (Scale == 1)
19625 return true;
19626 // r + r << imm
19627 Scale = Scale & ~1;
19628 return Scale == 2 || Scale == 4 || Scale == 8;
19629 case MVT::i64:
19630 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19631 // version in Thumb mode.
19632 // r + r
19633 if (Scale == 1)
19634 return true;
19635 // r * 2 (this can be lowered to r + r).
19636 if (!AM.HasBaseReg && Scale == 2)
19637 return true;
19638 return false;
19639 case MVT::isVoid:
19640 // Note, we allow "void" uses (basically, uses that aren't loads or
19641 // stores), because arm allows folding a scale into many arithmetic
19642 // operations. This should be made more precise and revisited later.
19643
19644 // Allow r << imm, but the imm has to be a multiple of two.
19645 if (Scale & 1) return false;
19646 return isPowerOf2_32(Scale);
19647 }
19648}
19649
19651 EVT VT) const {
19652 const int Scale = AM.Scale;
19653
19654 // Negative scales are not supported in Thumb1.
19655 if (Scale < 0)
19656 return false;
19657
19658 // Thumb1 addressing modes do not support register scaling excepting the
19659 // following cases:
19660 // 1. Scale == 1 means no scaling.
19661 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19662 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19663}
19664
19665/// isLegalAddressingMode - Return true if the addressing mode represented
19666/// by AM is legal for this target, for a load/store of the specified type.
19668 const AddrMode &AM, Type *Ty,
19669 unsigned AS, Instruction *I) const {
19670 EVT VT = getValueType(DL, Ty, true);
19671 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19672 return false;
19673
19674 // Can never fold addr of global into load/store.
19675 if (AM.BaseGV)
19676 return false;
19677
19678 switch (AM.Scale) {
19679 case 0: // no scale reg, must be "r+i" or "r", or "i".
19680 break;
19681 default:
19682 // ARM doesn't support any R+R*scale+imm addr modes.
19683 if (AM.BaseOffs)
19684 return false;
19685
19686 if (!VT.isSimple())
19687 return false;
19688
19689 if (Subtarget->isThumb1Only())
19690 return isLegalT1ScaledAddressingMode(AM, VT);
19691
19692 if (Subtarget->isThumb2())
19693 return isLegalT2ScaledAddressingMode(AM, VT);
19694
19695 int Scale = AM.Scale;
19696 switch (VT.getSimpleVT().SimpleTy) {
19697 default: return false;
19698 case MVT::i1:
19699 case MVT::i8:
19700 case MVT::i32:
19701 if (Scale < 0) Scale = -Scale;
19702 if (Scale == 1)
19703 return true;
19704 // r + r << imm
19705 return isPowerOf2_32(Scale & ~1);
19706 case MVT::i16:
19707 case MVT::i64:
19708 // r +/- r
19709 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19710 return true;
19711 // r * 2 (this can be lowered to r + r).
19712 if (!AM.HasBaseReg && Scale == 2)
19713 return true;
19714 return false;
19715
19716 case MVT::isVoid:
19717 // Note, we allow "void" uses (basically, uses that aren't loads or
19718 // stores), because arm allows folding a scale into many arithmetic
19719 // operations. This should be made more precise and revisited later.
19720
19721 // Allow r << imm, but the imm has to be a multiple of two.
19722 if (Scale & 1) return false;
19723 return isPowerOf2_32(Scale);
19724 }
19725 }
19726 return true;
19727}
19728
19729/// isLegalICmpImmediate - Return true if the specified immediate is legal
19730/// icmp immediate, that is the target has icmp instructions which can compare
19731/// a register against the immediate without having to materialize the
19732/// immediate into a register.
19734 // Thumb2 and ARM modes can use cmn for negative immediates.
19735 if (!Subtarget->isThumb())
19736 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19737 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19738 if (Subtarget->isThumb2())
19739 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19740 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19741 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19742 return Imm >= 0 && Imm <= 255;
19743}
19744
19745/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19746/// *or sub* immediate, that is the target has add or sub instructions which can
19747/// add a register with the immediate without having to materialize the
19748/// immediate into a register.
19750 // Same encoding for add/sub, just flip the sign.
19751 int64_t AbsImm = std::abs(Imm);
19752 if (!Subtarget->isThumb())
19753 return ARM_AM::getSOImmVal(AbsImm) != -1;
19754 if (Subtarget->isThumb2())
19755 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19756 // Thumb1 only has 8-bit unsigned immediate.
19757 return AbsImm >= 0 && AbsImm <= 255;
19758}
19759
19760// Return false to prevent folding
19761// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19762// if the folding leads to worse code.
19764 SDValue ConstNode) const {
19765 // Let the DAGCombiner decide for vector types and large types.
19766 const EVT VT = AddNode.getValueType();
19767 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19768 return true;
19769
19770 // It is worse if c0 is legal add immediate, while c1*c0 is not
19771 // and has to be composed by at least two instructions.
19772 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19773 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19774 const int64_t C0 = C0Node->getSExtValue();
19775 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19777 return true;
19778 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19779 return false;
19780
19781 // Default to true and let the DAGCombiner decide.
19782 return true;
19783}
19784
19786 bool isSEXTLoad, SDValue &Base,
19787 SDValue &Offset, bool &isInc,
19788 SelectionDAG &DAG) {
19789 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19790 return false;
19791
19792 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19793 // AddressingMode 3
19794 Base = Ptr->getOperand(0);
19795 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19796 int RHSC = (int)RHS->getZExtValue();
19797 if (RHSC < 0 && RHSC > -256) {
19798 assert(Ptr->getOpcode() == ISD::ADD);
19799 isInc = false;
19800 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19801 return true;
19802 }
19803 }
19804 isInc = (Ptr->getOpcode() == ISD::ADD);
19805 Offset = Ptr->getOperand(1);
19806 return true;
19807 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19808 // AddressingMode 2
19809 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19810 int RHSC = (int)RHS->getZExtValue();
19811 if (RHSC < 0 && RHSC > -0x1000) {
19812 assert(Ptr->getOpcode() == ISD::ADD);
19813 isInc = false;
19814 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19815 Base = Ptr->getOperand(0);
19816 return true;
19817 }
19818 }
19819
19820 if (Ptr->getOpcode() == ISD::ADD) {
19821 isInc = true;
19822 ARM_AM::ShiftOpc ShOpcVal=
19823 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19824 if (ShOpcVal != ARM_AM::no_shift) {
19825 Base = Ptr->getOperand(1);
19826 Offset = Ptr->getOperand(0);
19827 } else {
19828 Base = Ptr->getOperand(0);
19829 Offset = Ptr->getOperand(1);
19830 }
19831 return true;
19832 }
19833
19834 isInc = (Ptr->getOpcode() == ISD::ADD);
19835 Base = Ptr->getOperand(0);
19836 Offset = Ptr->getOperand(1);
19837 return true;
19838 }
19839
19840 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19841 return false;
19842}
19843
19845 bool isSEXTLoad, SDValue &Base,
19846 SDValue &Offset, bool &isInc,
19847 SelectionDAG &DAG) {
19848 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19849 return false;
19850
19851 Base = Ptr->getOperand(0);
19852 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19853 int RHSC = (int)RHS->getZExtValue();
19854 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19855 assert(Ptr->getOpcode() == ISD::ADD);
19856 isInc = false;
19857 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19858 return true;
19859 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19860 isInc = Ptr->getOpcode() == ISD::ADD;
19861 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19862 return true;
19863 }
19864 }
19865
19866 return false;
19867}
19868
19869static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19870 bool isSEXTLoad, bool IsMasked, bool isLE,
19872 bool &isInc, SelectionDAG &DAG) {
19873 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19874 return false;
19875 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19876 return false;
19877
19878 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19879 // as opposed to a vldrw.32). This can allow extra addressing modes or
19880 // alignments for what is otherwise an equivalent instruction.
19881 bool CanChangeType = isLE && !IsMasked;
19882
19883 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19884 int RHSC = (int)RHS->getZExtValue();
19885
19886 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19887 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19888 assert(Ptr->getOpcode() == ISD::ADD);
19889 isInc = false;
19890 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19891 return true;
19892 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19893 isInc = Ptr->getOpcode() == ISD::ADD;
19894 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19895 return true;
19896 }
19897 return false;
19898 };
19899
19900 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19901 // (in BE/masked) type.
19902 Base = Ptr->getOperand(0);
19903 if (VT == MVT::v4i16) {
19904 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19905 return true;
19906 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19907 if (IsInRange(RHSC, 0x80, 1))
19908 return true;
19909 } else if (Alignment >= 4 &&
19910 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19911 IsInRange(RHSC, 0x80, 4))
19912 return true;
19913 else if (Alignment >= 2 &&
19914 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19915 IsInRange(RHSC, 0x80, 2))
19916 return true;
19917 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19918 return true;
19919 return false;
19920}
19921
19922/// getPreIndexedAddressParts - returns true by value, base pointer and
19923/// offset pointer and addressing mode by reference if the node's address
19924/// can be legally represented as pre-indexed load / store address.
19925bool
19927 SDValue &Offset,
19929 SelectionDAG &DAG) const {
19930 if (Subtarget->isThumb1Only())
19931 return false;
19932
19933 EVT VT;
19934 SDValue Ptr;
19935 Align Alignment;
19936 bool isSEXTLoad = false;
19937 bool IsMasked = false;
19938 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19939 Ptr = LD->getBasePtr();
19940 VT = LD->getMemoryVT();
19941 Alignment = LD->getAlign();
19942 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19943 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19944 Ptr = ST->getBasePtr();
19945 VT = ST->getMemoryVT();
19946 Alignment = ST->getAlign();
19947 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19948 Ptr = LD->getBasePtr();
19949 VT = LD->getMemoryVT();
19950 Alignment = LD->getAlign();
19951 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19952 IsMasked = true;
19953 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19954 Ptr = ST->getBasePtr();
19955 VT = ST->getMemoryVT();
19956 Alignment = ST->getAlign();
19957 IsMasked = true;
19958 } else
19959 return false;
19960
19961 bool isInc;
19962 bool isLegal = false;
19963 if (VT.isVector())
19964 isLegal = Subtarget->hasMVEIntegerOps() &&
19966 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19967 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19968 else {
19969 if (Subtarget->isThumb2())
19970 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19971 Offset, isInc, DAG);
19972 else
19973 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19974 Offset, isInc, DAG);
19975 }
19976 if (!isLegal)
19977 return false;
19978
19979 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19980 return true;
19981}
19982
19983/// getPostIndexedAddressParts - returns true by value, base pointer and
19984/// offset pointer and addressing mode by reference if this node can be
19985/// combined with a load / store to form a post-indexed load / store.
19987 SDValue &Base,
19988 SDValue &Offset,
19990 SelectionDAG &DAG) const {
19991 EVT VT;
19992 SDValue Ptr;
19993 Align Alignment;
19994 bool isSEXTLoad = false, isNonExt;
19995 bool IsMasked = false;
19996 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19997 VT = LD->getMemoryVT();
19998 Ptr = LD->getBasePtr();
19999 Alignment = LD->getAlign();
20000 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20001 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20002 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20003 VT = ST->getMemoryVT();
20004 Ptr = ST->getBasePtr();
20005 Alignment = ST->getAlign();
20006 isNonExt = !ST->isTruncatingStore();
20007 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
20008 VT = LD->getMemoryVT();
20009 Ptr = LD->getBasePtr();
20010 Alignment = LD->getAlign();
20011 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20012 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20013 IsMasked = true;
20014 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
20015 VT = ST->getMemoryVT();
20016 Ptr = ST->getBasePtr();
20017 Alignment = ST->getAlign();
20018 isNonExt = !ST->isTruncatingStore();
20019 IsMasked = true;
20020 } else
20021 return false;
20022
20023 if (Subtarget->isThumb1Only()) {
20024 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
20025 // must be non-extending/truncating, i32, with an offset of 4.
20026 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
20027 if (Op->getOpcode() != ISD::ADD || !isNonExt)
20028 return false;
20029 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20030 if (!RHS || RHS->getZExtValue() != 4)
20031 return false;
20032 if (Alignment < Align(4))
20033 return false;
20034
20035 Offset = Op->getOperand(1);
20036 Base = Op->getOperand(0);
20037 AM = ISD::POST_INC;
20038 return true;
20039 }
20040
20041 bool isInc;
20042 bool isLegal = false;
20043 if (VT.isVector())
20044 isLegal = Subtarget->hasMVEIntegerOps() &&
20045 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
20046 Subtarget->isLittle(), Base, Offset,
20047 isInc, DAG);
20048 else {
20049 if (Subtarget->isThumb2())
20050 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20051 isInc, DAG);
20052 else
20053 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20054 isInc, DAG);
20055 }
20056 if (!isLegal)
20057 return false;
20058
20059 if (Ptr != Base) {
20060 // Swap base ptr and offset to catch more post-index load / store when
20061 // it's legal. In Thumb2 mode, offset must be an immediate.
20062 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20063 !Subtarget->isThumb2())
20065
20066 // Post-indexed load / store update the base pointer.
20067 if (Ptr != Base)
20068 return false;
20069 }
20070
20071 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
20072 return true;
20073}
20074
20076 KnownBits &Known,
20077 const APInt &DemandedElts,
20078 const SelectionDAG &DAG,
20079 unsigned Depth) const {
20080 unsigned BitWidth = Known.getBitWidth();
20081 Known.resetAll();
20082 switch (Op.getOpcode()) {
20083 default: break;
20084 case ARMISD::ADDC:
20085 case ARMISD::ADDE:
20086 case ARMISD::SUBC:
20087 case ARMISD::SUBE:
20088 // Special cases when we convert a carry to a boolean.
20089 if (Op.getResNo() == 0) {
20090 SDValue LHS = Op.getOperand(0);
20091 SDValue RHS = Op.getOperand(1);
20092 // (ADDE 0, 0, C) will give us a single bit.
20093 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20096 return;
20097 }
20098 }
20099 break;
20100 case ARMISD::CMOV: {
20101 // Bits are known zero/one if known on the LHS and RHS.
20102 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20103 if (Known.isUnknown())
20104 return;
20105
20106 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20107 Known = Known.intersectWith(KnownRHS);
20108 return;
20109 }
20111 Intrinsic::ID IntID =
20112 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20113 switch (IntID) {
20114 default: return;
20115 case Intrinsic::arm_ldaex:
20116 case Intrinsic::arm_ldrex: {
20117 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20118 unsigned MemBits = VT.getScalarSizeInBits();
20119 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20120 return;
20121 }
20122 }
20123 }
20124 case ARMISD::BFI: {
20125 // Conservatively, we can recurse down the first operand
20126 // and just mask out all affected bits.
20127 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20128
20129 // The operand to BFI is already a mask suitable for removing the bits it
20130 // sets.
20131 const APInt &Mask = Op.getConstantOperandAPInt(2);
20132 Known.Zero &= Mask;
20133 Known.One &= Mask;
20134 return;
20135 }
20136 case ARMISD::VGETLANEs:
20137 case ARMISD::VGETLANEu: {
20138 const SDValue &SrcSV = Op.getOperand(0);
20139 EVT VecVT = SrcSV.getValueType();
20140 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20141 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20142 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20143 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20144 "VGETLANE index out of bounds");
20145 unsigned Idx = Pos->getZExtValue();
20146 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20147 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20148
20149 EVT VT = Op.getValueType();
20150 const unsigned DstSz = VT.getScalarSizeInBits();
20151 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20152 (void)SrcSz;
20153 assert(SrcSz == Known.getBitWidth());
20154 assert(DstSz > SrcSz);
20155 if (Op.getOpcode() == ARMISD::VGETLANEs)
20156 Known = Known.sext(DstSz);
20157 else {
20158 Known = Known.zext(DstSz);
20159 }
20160 assert(DstSz == Known.getBitWidth());
20161 break;
20162 }
20163 case ARMISD::VMOVrh: {
20164 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20165 assert(KnownOp.getBitWidth() == 16);
20166 Known = KnownOp.zext(32);
20167 break;
20168 }
20169 case ARMISD::CSINC:
20170 case ARMISD::CSINV:
20171 case ARMISD::CSNEG: {
20172 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20173 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20174
20175 // The result is either:
20176 // CSINC: KnownOp0 or KnownOp1 + 1
20177 // CSINV: KnownOp0 or ~KnownOp1
20178 // CSNEG: KnownOp0 or KnownOp1 * -1
20179 if (Op.getOpcode() == ARMISD::CSINC)
20180 KnownOp1 = KnownBits::computeForAddSub(
20181 /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownOp1,
20183 else if (Op.getOpcode() == ARMISD::CSINV)
20184 std::swap(KnownOp1.Zero, KnownOp1.One);
20185 else if (Op.getOpcode() == ARMISD::CSNEG)
20186 KnownOp1 = KnownBits::mul(
20187 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20188
20189 Known = KnownOp0.intersectWith(KnownOp1);
20190 break;
20191 }
20192 }
20193}
20194
20196 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20197 TargetLoweringOpt &TLO) const {
20198 // Delay optimization, so we don't have to deal with illegal types, or block
20199 // optimizations.
20200 if (!TLO.LegalOps)
20201 return false;
20202
20203 // Only optimize AND for now.
20204 if (Op.getOpcode() != ISD::AND)
20205 return false;
20206
20207 EVT VT = Op.getValueType();
20208
20209 // Ignore vectors.
20210 if (VT.isVector())
20211 return false;
20212
20213 assert(VT == MVT::i32 && "Unexpected integer type");
20214
20215 // Make sure the RHS really is a constant.
20216 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20217 if (!C)
20218 return false;
20219
20220 unsigned Mask = C->getZExtValue();
20221
20222 unsigned Demanded = DemandedBits.getZExtValue();
20223 unsigned ShrunkMask = Mask & Demanded;
20224 unsigned ExpandedMask = Mask | ~Demanded;
20225
20226 // If the mask is all zeros, let the target-independent code replace the
20227 // result with zero.
20228 if (ShrunkMask == 0)
20229 return false;
20230
20231 // If the mask is all ones, erase the AND. (Currently, the target-independent
20232 // code won't do this, so we have to do it explicitly to avoid an infinite
20233 // loop in obscure cases.)
20234 if (ExpandedMask == ~0U)
20235 return TLO.CombineTo(Op, Op.getOperand(0));
20236
20237 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20238 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20239 };
20240 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20241 if (NewMask == Mask)
20242 return true;
20243 SDLoc DL(Op);
20244 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20245 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20246 return TLO.CombineTo(Op, NewOp);
20247 };
20248
20249 // Prefer uxtb mask.
20250 if (IsLegalMask(0xFF))
20251 return UseMask(0xFF);
20252
20253 // Prefer uxth mask.
20254 if (IsLegalMask(0xFFFF))
20255 return UseMask(0xFFFF);
20256
20257 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20258 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20259 if (ShrunkMask < 256)
20260 return UseMask(ShrunkMask);
20261
20262 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20263 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20264 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20265 return UseMask(ExpandedMask);
20266
20267 // Potential improvements:
20268 //
20269 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20270 // We could try to prefer Thumb1 immediates which can be lowered to a
20271 // two-instruction sequence.
20272 // We could try to recognize more legal ARM/Thumb2 immediates here.
20273
20274 return false;
20275}
20276
20278 SDValue Op, const APInt &OriginalDemandedBits,
20279 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20280 unsigned Depth) const {
20281 unsigned Opc = Op.getOpcode();
20282
20283 switch (Opc) {
20284 case ARMISD::ASRL:
20285 case ARMISD::LSRL: {
20286 // If this is result 0 and the other result is unused, see if the demand
20287 // bits allow us to shrink this long shift into a standard small shift in
20288 // the opposite direction.
20289 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20290 isa<ConstantSDNode>(Op->getOperand(2))) {
20291 unsigned ShAmt = Op->getConstantOperandVal(2);
20292 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20293 << (32 - ShAmt)))
20294 return TLO.CombineTo(
20295 Op, TLO.DAG.getNode(
20296 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20297 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20298 }
20299 break;
20300 }
20301 case ARMISD::VBICIMM: {
20302 SDValue Op0 = Op.getOperand(0);
20303 unsigned ModImm = Op.getConstantOperandVal(1);
20304 unsigned EltBits = 0;
20305 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20306 if ((OriginalDemandedBits & Mask) == 0)
20307 return TLO.CombineTo(Op, Op0);
20308 }
20309 }
20310
20312 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20313}
20314
20315//===----------------------------------------------------------------------===//
20316// ARM Inline Assembly Support
20317//===----------------------------------------------------------------------===//
20318
20320 // Looking for "rev" which is V6+.
20321 if (!Subtarget->hasV6Ops())
20322 return false;
20323
20324 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20325 StringRef AsmStr = IA->getAsmString();
20326 SmallVector<StringRef, 4> AsmPieces;
20327 SplitString(AsmStr, AsmPieces, ";\n");
20328
20329 switch (AsmPieces.size()) {
20330 default: return false;
20331 case 1:
20332 AsmStr = AsmPieces[0];
20333 AsmPieces.clear();
20334 SplitString(AsmStr, AsmPieces, " \t,");
20335
20336 // rev $0, $1
20337 if (AsmPieces.size() == 3 &&
20338 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20339 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20340 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20341 if (Ty && Ty->getBitWidth() == 32)
20343 }
20344 break;
20345 }
20346
20347 return false;
20348}
20349
20350const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20351 // At this point, we have to lower this constraint to something else, so we
20352 // lower it to an "r" or "w". However, by doing this we will force the result
20353 // to be in register, while the X constraint is much more permissive.
20354 //
20355 // Although we are correct (we are free to emit anything, without
20356 // constraints), we might break use cases that would expect us to be more
20357 // efficient and emit something else.
20358 if (!Subtarget->hasVFP2Base())
20359 return "r";
20360 if (ConstraintVT.isFloatingPoint())
20361 return "w";
20362 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20363 (ConstraintVT.getSizeInBits() == 64 ||
20364 ConstraintVT.getSizeInBits() == 128))
20365 return "w";
20366
20367 return "r";
20368}
20369
20370/// getConstraintType - Given a constraint letter, return the type of
20371/// constraint it is for this target.
20374 unsigned S = Constraint.size();
20375 if (S == 1) {
20376 switch (Constraint[0]) {
20377 default: break;
20378 case 'l': return C_RegisterClass;
20379 case 'w': return C_RegisterClass;
20380 case 'h': return C_RegisterClass;
20381 case 'x': return C_RegisterClass;
20382 case 't': return C_RegisterClass;
20383 case 'j': return C_Immediate; // Constant for movw.
20384 // An address with a single base register. Due to the way we
20385 // currently handle addresses it is the same as an 'r' memory constraint.
20386 case 'Q': return C_Memory;
20387 }
20388 } else if (S == 2) {
20389 switch (Constraint[0]) {
20390 default: break;
20391 case 'T': return C_RegisterClass;
20392 // All 'U+' constraints are addresses.
20393 case 'U': return C_Memory;
20394 }
20395 }
20396 return TargetLowering::getConstraintType(Constraint);
20397}
20398
20399/// Examine constraint type and operand type and determine a weight value.
20400/// This object must already have been set up with the operand type
20401/// and the current alternative constraint selected.
20404 AsmOperandInfo &info, const char *constraint) const {
20406 Value *CallOperandVal = info.CallOperandVal;
20407 // If we don't have a value, we can't do a match,
20408 // but allow it at the lowest weight.
20409 if (!CallOperandVal)
20410 return CW_Default;
20411 Type *type = CallOperandVal->getType();
20412 // Look at the constraint type.
20413 switch (*constraint) {
20414 default:
20416 break;
20417 case 'l':
20418 if (type->isIntegerTy()) {
20419 if (Subtarget->isThumb())
20420 weight = CW_SpecificReg;
20421 else
20422 weight = CW_Register;
20423 }
20424 break;
20425 case 'w':
20426 if (type->isFloatingPointTy())
20427 weight = CW_Register;
20428 break;
20429 }
20430 return weight;
20431}
20432
20433using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20434
20436 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20437 switch (Constraint.size()) {
20438 case 1:
20439 // GCC ARM Constraint Letters
20440 switch (Constraint[0]) {
20441 case 'l': // Low regs or general regs.
20442 if (Subtarget->isThumb())
20443 return RCPair(0U, &ARM::tGPRRegClass);
20444 return RCPair(0U, &ARM::GPRRegClass);
20445 case 'h': // High regs or no regs.
20446 if (Subtarget->isThumb())
20447 return RCPair(0U, &ARM::hGPRRegClass);
20448 break;
20449 case 'r':
20450 if (Subtarget->isThumb1Only())
20451 return RCPair(0U, &ARM::tGPRRegClass);
20452 return RCPair(0U, &ARM::GPRRegClass);
20453 case 'w':
20454 if (VT == MVT::Other)
20455 break;
20456 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20457 return RCPair(0U, &ARM::SPRRegClass);
20458 if (VT.getSizeInBits() == 64)
20459 return RCPair(0U, &ARM::DPRRegClass);
20460 if (VT.getSizeInBits() == 128)
20461 return RCPair(0U, &ARM::QPRRegClass);
20462 break;
20463 case 'x':
20464 if (VT == MVT::Other)
20465 break;
20466 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20467 return RCPair(0U, &ARM::SPR_8RegClass);
20468 if (VT.getSizeInBits() == 64)
20469 return RCPair(0U, &ARM::DPR_8RegClass);
20470 if (VT.getSizeInBits() == 128)
20471 return RCPair(0U, &ARM::QPR_8RegClass);
20472 break;
20473 case 't':
20474 if (VT == MVT::Other)
20475 break;
20476 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20477 return RCPair(0U, &ARM::SPRRegClass);
20478 if (VT.getSizeInBits() == 64)
20479 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20480 if (VT.getSizeInBits() == 128)
20481 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20482 break;
20483 }
20484 break;
20485
20486 case 2:
20487 if (Constraint[0] == 'T') {
20488 switch (Constraint[1]) {
20489 default:
20490 break;
20491 case 'e':
20492 return RCPair(0U, &ARM::tGPREvenRegClass);
20493 case 'o':
20494 return RCPair(0U, &ARM::tGPROddRegClass);
20495 }
20496 }
20497 break;
20498
20499 default:
20500 break;
20501 }
20502
20503 if (StringRef("{cc}").equals_insensitive(Constraint))
20504 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20505
20506 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20507}
20508
20509/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20510/// vector. If it is invalid, don't add anything to Ops.
20512 StringRef Constraint,
20513 std::vector<SDValue> &Ops,
20514 SelectionDAG &DAG) const {
20515 SDValue Result;
20516
20517 // Currently only support length 1 constraints.
20518 if (Constraint.size() != 1)
20519 return;
20520
20521 char ConstraintLetter = Constraint[0];
20522 switch (ConstraintLetter) {
20523 default: break;
20524 case 'j':
20525 case 'I': case 'J': case 'K': case 'L':
20526 case 'M': case 'N': case 'O':
20527 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20528 if (!C)
20529 return;
20530
20531 int64_t CVal64 = C->getSExtValue();
20532 int CVal = (int) CVal64;
20533 // None of these constraints allow values larger than 32 bits. Check
20534 // that the value fits in an int.
20535 if (CVal != CVal64)
20536 return;
20537
20538 switch (ConstraintLetter) {
20539 case 'j':
20540 // Constant suitable for movw, must be between 0 and
20541 // 65535.
20542 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20543 if (CVal >= 0 && CVal <= 65535)
20544 break;
20545 return;
20546 case 'I':
20547 if (Subtarget->isThumb1Only()) {
20548 // This must be a constant between 0 and 255, for ADD
20549 // immediates.
20550 if (CVal >= 0 && CVal <= 255)
20551 break;
20552 } else if (Subtarget->isThumb2()) {
20553 // A constant that can be used as an immediate value in a
20554 // data-processing instruction.
20555 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20556 break;
20557 } else {
20558 // A constant that can be used as an immediate value in a
20559 // data-processing instruction.
20560 if (ARM_AM::getSOImmVal(CVal) != -1)
20561 break;
20562 }
20563 return;
20564
20565 case 'J':
20566 if (Subtarget->isThumb1Only()) {
20567 // This must be a constant between -255 and -1, for negated ADD
20568 // immediates. This can be used in GCC with an "n" modifier that
20569 // prints the negated value, for use with SUB instructions. It is
20570 // not useful otherwise but is implemented for compatibility.
20571 if (CVal >= -255 && CVal <= -1)
20572 break;
20573 } else {
20574 // This must be a constant between -4095 and 4095. It is not clear
20575 // what this constraint is intended for. Implemented for
20576 // compatibility with GCC.
20577 if (CVal >= -4095 && CVal <= 4095)
20578 break;
20579 }
20580 return;
20581
20582 case 'K':
20583 if (Subtarget->isThumb1Only()) {
20584 // A 32-bit value where only one byte has a nonzero value. Exclude
20585 // zero to match GCC. This constraint is used by GCC internally for
20586 // constants that can be loaded with a move/shift combination.
20587 // It is not useful otherwise but is implemented for compatibility.
20588 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20589 break;
20590 } else if (Subtarget->isThumb2()) {
20591 // A constant whose bitwise inverse can be used as an immediate
20592 // value in a data-processing instruction. This can be used in GCC
20593 // with a "B" modifier that prints the inverted value, for use with
20594 // BIC and MVN instructions. It is not useful otherwise but is
20595 // implemented for compatibility.
20596 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20597 break;
20598 } else {
20599 // A constant whose bitwise inverse can be used as an immediate
20600 // value in a data-processing instruction. This can be used in GCC
20601 // with a "B" modifier that prints the inverted value, for use with
20602 // BIC and MVN instructions. It is not useful otherwise but is
20603 // implemented for compatibility.
20604 if (ARM_AM::getSOImmVal(~CVal) != -1)
20605 break;
20606 }
20607 return;
20608
20609 case 'L':
20610 if (Subtarget->isThumb1Only()) {
20611 // This must be a constant between -7 and 7,
20612 // for 3-operand ADD/SUB immediate instructions.
20613 if (CVal >= -7 && CVal < 7)
20614 break;
20615 } else if (Subtarget->isThumb2()) {
20616 // A constant whose negation can be used as an immediate value in a
20617 // data-processing instruction. This can be used in GCC with an "n"
20618 // modifier that prints the negated value, for use with SUB
20619 // instructions. It is not useful otherwise but is implemented for
20620 // compatibility.
20621 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20622 break;
20623 } else {
20624 // A constant whose negation can be used as an immediate value in a
20625 // data-processing instruction. This can be used in GCC with an "n"
20626 // modifier that prints the negated value, for use with SUB
20627 // instructions. It is not useful otherwise but is implemented for
20628 // compatibility.
20629 if (ARM_AM::getSOImmVal(-CVal) != -1)
20630 break;
20631 }
20632 return;
20633
20634 case 'M':
20635 if (Subtarget->isThumb1Only()) {
20636 // This must be a multiple of 4 between 0 and 1020, for
20637 // ADD sp + immediate.
20638 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20639 break;
20640 } else {
20641 // A power of two or a constant between 0 and 32. This is used in
20642 // GCC for the shift amount on shifted register operands, but it is
20643 // useful in general for any shift amounts.
20644 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20645 break;
20646 }
20647 return;
20648
20649 case 'N':
20650 if (Subtarget->isThumb1Only()) {
20651 // This must be a constant between 0 and 31, for shift amounts.
20652 if (CVal >= 0 && CVal <= 31)
20653 break;
20654 }
20655 return;
20656
20657 case 'O':
20658 if (Subtarget->isThumb1Only()) {
20659 // This must be a multiple of 4 between -508 and 508, for
20660 // ADD/SUB sp = sp + immediate.
20661 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20662 break;
20663 }
20664 return;
20665 }
20666 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20667 break;
20668 }
20669
20670 if (Result.getNode()) {
20671 Ops.push_back(Result);
20672 return;
20673 }
20674 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20675}
20676
20678 const SDNode *N, MVT::SimpleValueType SVT) {
20679 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20680 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20681 "Unhandled Opcode in getDivRemLibcall");
20682 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20683 N->getOpcode() == ISD::SREM;
20684 RTLIB::Libcall LC;
20685 switch (SVT) {
20686 default: llvm_unreachable("Unexpected request for libcall!");
20687 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20688 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20689 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20690 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20691 }
20692 return LC;
20693}
20694
20696 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20697 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20698 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20699 "Unhandled Opcode in getDivRemArgList");
20700 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20701 N->getOpcode() == ISD::SREM;
20704 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20705 EVT ArgVT = N->getOperand(i).getValueType();
20706 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20707 Entry.Node = N->getOperand(i);
20708 Entry.Ty = ArgTy;
20709 Entry.IsSExt = isSigned;
20710 Entry.IsZExt = !isSigned;
20711 Args.push_back(Entry);
20712 }
20713 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20714 std::swap(Args[0], Args[1]);
20715 return Args;
20716}
20717
20718SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20719 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20720 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20721 Subtarget->isTargetWindows()) &&
20722 "Register-based DivRem lowering only");
20723 unsigned Opcode = Op->getOpcode();
20724 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20725 "Invalid opcode for Div/Rem lowering");
20726 bool isSigned = (Opcode == ISD::SDIVREM);
20727 EVT VT = Op->getValueType(0);
20728 SDLoc dl(Op);
20729
20730 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20732 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20733 SDValue Res0 =
20734 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20735 SDValue Res1 =
20736 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20737 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20738 {Res0, Res1});
20739 }
20740 }
20741
20742 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20743
20744 // If the target has hardware divide, use divide + multiply + subtract:
20745 // div = a / b
20746 // rem = a - b * div
20747 // return {div, rem}
20748 // This should be lowered into UDIV/SDIV + MLS later on.
20749 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20750 : Subtarget->hasDivideInARMMode();
20751 if (hasDivide && Op->getValueType(0).isSimple() &&
20752 Op->getSimpleValueType(0) == MVT::i32) {
20753 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20754 const SDValue Dividend = Op->getOperand(0);
20755 const SDValue Divisor = Op->getOperand(1);
20756 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20757 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20758 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20759
20760 SDValue Values[2] = {Div, Rem};
20761 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20762 }
20763
20764 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20765 VT.getSimpleVT().SimpleTy);
20766 SDValue InChain = DAG.getEntryNode();
20767
20769 DAG.getContext(),
20770 Subtarget);
20771
20774
20775 Type *RetTy = StructType::get(Ty, Ty);
20776
20777 if (Subtarget->isTargetWindows())
20778 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20779
20781 CLI.setDebugLoc(dl).setChain(InChain)
20782 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20784
20785 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20786 return CallInfo.first;
20787}
20788
20789// Lowers REM using divmod helpers
20790// see RTABI section 4.2/4.3
20791SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20792 EVT VT = N->getValueType(0);
20793
20794 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20796 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20797 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20798 Result[0], Result[1]);
20799 }
20800
20801 // Build return types (div and rem)
20802 std::vector<Type*> RetTyParams;
20803 Type *RetTyElement;
20804
20805 switch (VT.getSimpleVT().SimpleTy) {
20806 default: llvm_unreachable("Unexpected request for libcall!");
20807 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20808 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20809 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20810 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20811 }
20812
20813 RetTyParams.push_back(RetTyElement);
20814 RetTyParams.push_back(RetTyElement);
20815 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20816 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20817
20818 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20819 SimpleTy);
20820 SDValue InChain = DAG.getEntryNode();
20822 Subtarget);
20823 bool isSigned = N->getOpcode() == ISD::SREM;
20826
20827 if (Subtarget->isTargetWindows())
20828 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20829
20830 // Lower call
20831 CallLoweringInfo CLI(DAG);
20832 CLI.setChain(InChain)
20833 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20835 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20836
20837 // Return second (rem) result operand (first contains div)
20838 SDNode *ResNode = CallResult.first.getNode();
20839 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20840 return ResNode->getOperand(1);
20841}
20842
20843SDValue
20844ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20845 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20846 SDLoc DL(Op);
20847
20848 // Get the inputs.
20849 SDValue Chain = Op.getOperand(0);
20850 SDValue Size = Op.getOperand(1);
20851
20853 "no-stack-arg-probe")) {
20855 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20856 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20857 Chain = SP.getValue(1);
20858 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20859 if (Align)
20860 SP =
20861 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20862 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20863 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20864 SDValue Ops[2] = { SP, Chain };
20865 return DAG.getMergeValues(Ops, DL);
20866 }
20867
20868 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20869 DAG.getConstant(2, DL, MVT::i32));
20870
20871 SDValue Glue;
20872 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20873 Glue = Chain.getValue(1);
20874
20875 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20876 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20877
20878 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20879 Chain = NewSP.getValue(1);
20880
20881 SDValue Ops[2] = { NewSP, Chain };
20882 return DAG.getMergeValues(Ops, DL);
20883}
20884
20885SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20886 bool IsStrict = Op->isStrictFPOpcode();
20887 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20888 const unsigned DstSz = Op.getValueType().getSizeInBits();
20889 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20890 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20891 "Unexpected type for custom-lowering FP_EXTEND");
20892
20893 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20894 "With both FP DP and 16, any FP conversion is legal!");
20895
20896 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20897 "With FP16, 16 to 32 conversion is legal!");
20898
20899 // Converting from 32 -> 64 is valid if we have FP64.
20900 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20901 // FIXME: Remove this when we have strict fp instruction selection patterns
20902 if (IsStrict) {
20903 SDLoc Loc(Op);
20905 Loc, Op.getValueType(), SrcVal);
20906 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20907 }
20908 return Op;
20909 }
20910
20911 // Either we are converting from 16 -> 64, without FP16 and/or
20912 // FP.double-precision or without Armv8-fp. So we must do it in two
20913 // steps.
20914 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20915 // without FP16. So we must do a function call.
20916 SDLoc Loc(Op);
20917 RTLIB::Libcall LC;
20918 MakeLibCallOptions CallOptions;
20919 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20920 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20921 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20922 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20923 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20924 if (Supported) {
20925 if (IsStrict) {
20926 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20927 {DstVT, MVT::Other}, {Chain, SrcVal});
20928 Chain = SrcVal.getValue(1);
20929 } else {
20930 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20931 }
20932 } else {
20933 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20934 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20935 "Unexpected type for custom-lowering FP_EXTEND");
20936 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20937 Loc, Chain);
20938 }
20939 }
20940
20941 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20942}
20943
20944SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20945 bool IsStrict = Op->isStrictFPOpcode();
20946
20947 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20948 EVT SrcVT = SrcVal.getValueType();
20949 EVT DstVT = Op.getValueType();
20950 const unsigned DstSz = Op.getValueType().getSizeInBits();
20951 const unsigned SrcSz = SrcVT.getSizeInBits();
20952 (void)DstSz;
20953 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20954 "Unexpected type for custom-lowering FP_ROUND");
20955
20956 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20957 "With both FP DP and 16, any FP conversion is legal!");
20958
20959 SDLoc Loc(Op);
20960
20961 // Instruction from 32 -> 16 if hasFP16 is valid
20962 if (SrcSz == 32 && Subtarget->hasFP16())
20963 return Op;
20964
20965 // Lib call from 32 -> 16 / 64 -> [32, 16]
20966 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20967 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20968 "Unexpected type for custom-lowering FP_ROUND");
20969 MakeLibCallOptions CallOptions;
20970 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20972 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20973 Loc, Chain);
20974 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20975}
20976
20977bool
20979 // The ARM target isn't yet aware of offsets.
20980 return false;
20981}
20982
20984 if (v == 0xffffffff)
20985 return false;
20986
20987 // there can be 1's on either or both "outsides", all the "inside"
20988 // bits must be 0's
20989 return isShiftedMask_32(~v);
20990}
20991
20992/// isFPImmLegal - Returns true if the target can instruction select the
20993/// specified FP immediate natively. If false, the legalizer will
20994/// materialize the FP immediate as a load from a constant pool.
20996 bool ForCodeSize) const {
20997 if (!Subtarget->hasVFP3Base())
20998 return false;
20999 if (VT == MVT::f16 && Subtarget->hasFullFP16())
21000 return ARM_AM::getFP16Imm(Imm) != -1;
21001 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
21002 ARM_AM::getFP32FP16Imm(Imm) != -1)
21003 return true;
21004 if (VT == MVT::f32)
21005 return ARM_AM::getFP32Imm(Imm) != -1;
21006 if (VT == MVT::f64 && Subtarget->hasFP64())
21007 return ARM_AM::getFP64Imm(Imm) != -1;
21008 return false;
21009}
21010
21011/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
21012/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
21013/// specified in the intrinsic calls.
21015 const CallInst &I,
21016 MachineFunction &MF,
21017 unsigned Intrinsic) const {
21018 switch (Intrinsic) {
21019 case Intrinsic::arm_neon_vld1:
21020 case Intrinsic::arm_neon_vld2:
21021 case Intrinsic::arm_neon_vld3:
21022 case Intrinsic::arm_neon_vld4:
21023 case Intrinsic::arm_neon_vld2lane:
21024 case Intrinsic::arm_neon_vld3lane:
21025 case Intrinsic::arm_neon_vld4lane:
21026 case Intrinsic::arm_neon_vld2dup:
21027 case Intrinsic::arm_neon_vld3dup:
21028 case Intrinsic::arm_neon_vld4dup: {
21030 // Conservatively set memVT to the entire set of vectors loaded.
21031 auto &DL = I.getDataLayout();
21032 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21033 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21034 Info.ptrVal = I.getArgOperand(0);
21035 Info.offset = 0;
21036 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21037 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21038 // volatile loads with NEON intrinsics not supported
21040 return true;
21041 }
21042 case Intrinsic::arm_neon_vld1x2:
21043 case Intrinsic::arm_neon_vld1x3:
21044 case Intrinsic::arm_neon_vld1x4: {
21046 // Conservatively set memVT to the entire set of vectors loaded.
21047 auto &DL = I.getDataLayout();
21048 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21049 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21050 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21051 Info.offset = 0;
21052 Info.align.reset();
21053 // volatile loads with NEON intrinsics not supported
21055 return true;
21056 }
21057 case Intrinsic::arm_neon_vst1:
21058 case Intrinsic::arm_neon_vst2:
21059 case Intrinsic::arm_neon_vst3:
21060 case Intrinsic::arm_neon_vst4:
21061 case Intrinsic::arm_neon_vst2lane:
21062 case Intrinsic::arm_neon_vst3lane:
21063 case Intrinsic::arm_neon_vst4lane: {
21065 // Conservatively set memVT to the entire set of vectors stored.
21066 auto &DL = I.getDataLayout();
21067 unsigned NumElts = 0;
21068 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21069 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21070 if (!ArgTy->isVectorTy())
21071 break;
21072 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21073 }
21074 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21075 Info.ptrVal = I.getArgOperand(0);
21076 Info.offset = 0;
21077 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21078 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21079 // volatile stores with NEON intrinsics not supported
21081 return true;
21082 }
21083 case Intrinsic::arm_neon_vst1x2:
21084 case Intrinsic::arm_neon_vst1x3:
21085 case Intrinsic::arm_neon_vst1x4: {
21087 // Conservatively set memVT to the entire set of vectors stored.
21088 auto &DL = I.getDataLayout();
21089 unsigned NumElts = 0;
21090 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21091 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21092 if (!ArgTy->isVectorTy())
21093 break;
21094 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21095 }
21096 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21097 Info.ptrVal = I.getArgOperand(0);
21098 Info.offset = 0;
21099 Info.align.reset();
21100 // volatile stores with NEON intrinsics not supported
21102 return true;
21103 }
21104 case Intrinsic::arm_mve_vld2q:
21105 case Intrinsic::arm_mve_vld4q: {
21107 // Conservatively set memVT to the entire set of vectors loaded.
21108 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21109 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21110 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21111 Info.ptrVal = I.getArgOperand(0);
21112 Info.offset = 0;
21113 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21114 // volatile loads with MVE intrinsics not supported
21116 return true;
21117 }
21118 case Intrinsic::arm_mve_vst2q:
21119 case Intrinsic::arm_mve_vst4q: {
21121 // Conservatively set memVT to the entire set of vectors stored.
21122 Type *VecTy = I.getArgOperand(1)->getType();
21123 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21124 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21125 Info.ptrVal = I.getArgOperand(0);
21126 Info.offset = 0;
21127 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21128 // volatile stores with MVE intrinsics not supported
21130 return true;
21131 }
21132 case Intrinsic::arm_mve_vldr_gather_base:
21133 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21135 Info.ptrVal = nullptr;
21136 Info.memVT = MVT::getVT(I.getType());
21137 Info.align = Align(1);
21139 return true;
21140 }
21141 case Intrinsic::arm_mve_vldr_gather_base_wb:
21142 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21144 Info.ptrVal = nullptr;
21145 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21146 Info.align = Align(1);
21148 return true;
21149 }
21150 case Intrinsic::arm_mve_vldr_gather_offset:
21151 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21153 Info.ptrVal = nullptr;
21154 MVT DataVT = MVT::getVT(I.getType());
21155 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21156 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21157 DataVT.getVectorNumElements());
21158 Info.align = Align(1);
21160 return true;
21161 }
21162 case Intrinsic::arm_mve_vstr_scatter_base:
21163 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21165 Info.ptrVal = nullptr;
21166 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21167 Info.align = Align(1);
21169 return true;
21170 }
21171 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21172 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21174 Info.ptrVal = nullptr;
21175 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21176 Info.align = Align(1);
21178 return true;
21179 }
21180 case Intrinsic::arm_mve_vstr_scatter_offset:
21181 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21183 Info.ptrVal = nullptr;
21184 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21185 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21186 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21187 DataVT.getVectorNumElements());
21188 Info.align = Align(1);
21190 return true;
21191 }
21192 case Intrinsic::arm_ldaex:
21193 case Intrinsic::arm_ldrex: {
21194 auto &DL = I.getDataLayout();
21195 Type *ValTy = I.getParamElementType(0);
21197 Info.memVT = MVT::getVT(ValTy);
21198 Info.ptrVal = I.getArgOperand(0);
21199 Info.offset = 0;
21200 Info.align = DL.getABITypeAlign(ValTy);
21202 return true;
21203 }
21204 case Intrinsic::arm_stlex:
21205 case Intrinsic::arm_strex: {
21206 auto &DL = I.getDataLayout();
21207 Type *ValTy = I.getParamElementType(1);
21209 Info.memVT = MVT::getVT(ValTy);
21210 Info.ptrVal = I.getArgOperand(1);
21211 Info.offset = 0;
21212 Info.align = DL.getABITypeAlign(ValTy);
21214 return true;
21215 }
21216 case Intrinsic::arm_stlexd:
21217 case Intrinsic::arm_strexd:
21219 Info.memVT = MVT::i64;
21220 Info.ptrVal = I.getArgOperand(2);
21221 Info.offset = 0;
21222 Info.align = Align(8);
21224 return true;
21225
21226 case Intrinsic::arm_ldaexd:
21227 case Intrinsic::arm_ldrexd:
21229 Info.memVT = MVT::i64;
21230 Info.ptrVal = I.getArgOperand(0);
21231 Info.offset = 0;
21232 Info.align = Align(8);
21234 return true;
21235
21236 default:
21237 break;
21238 }
21239
21240 return false;
21241}
21242
21243/// Returns true if it is beneficial to convert a load of a constant
21244/// to just the constant itself.
21246 Type *Ty) const {
21247 assert(Ty->isIntegerTy());
21248
21249 unsigned Bits = Ty->getPrimitiveSizeInBits();
21250 if (Bits == 0 || Bits > 32)
21251 return false;
21252 return true;
21253}
21254
21256 unsigned Index) const {
21258 return false;
21259
21260 return (Index == 0 || Index == ResVT.getVectorNumElements());
21261}
21262
21264 ARM_MB::MemBOpt Domain) const {
21265 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21266
21267 // First, if the target has no DMB, see what fallback we can use.
21268 if (!Subtarget->hasDataBarrier()) {
21269 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21270 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21271 // here.
21272 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21273 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21274 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21275 Builder.getInt32(0), Builder.getInt32(7),
21276 Builder.getInt32(10), Builder.getInt32(5)};
21277 return Builder.CreateCall(MCR, args);
21278 } else {
21279 // Instead of using barriers, atomic accesses on these subtargets use
21280 // libcalls.
21281 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21282 }
21283 } else {
21284 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21285 // Only a full system barrier exists in the M-class architectures.
21286 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21287 Constant *CDomain = Builder.getInt32(Domain);
21288 return Builder.CreateCall(DMB, CDomain);
21289 }
21290}
21291
21292// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21294 Instruction *Inst,
21295 AtomicOrdering Ord) const {
21296 switch (Ord) {
21299 llvm_unreachable("Invalid fence: unordered/non-atomic");
21302 return nullptr; // Nothing to do
21304 if (!Inst->hasAtomicStore())
21305 return nullptr; // Nothing to do
21306 [[fallthrough]];
21309 if (Subtarget->preferISHSTBarriers())
21310 return makeDMB(Builder, ARM_MB::ISHST);
21311 // FIXME: add a comment with a link to documentation justifying this.
21312 else
21313 return makeDMB(Builder, ARM_MB::ISH);
21314 }
21315 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21316}
21317
21319 Instruction *Inst,
21320 AtomicOrdering Ord) const {
21321 switch (Ord) {
21324 llvm_unreachable("Invalid fence: unordered/not-atomic");
21327 return nullptr; // Nothing to do
21331 return makeDMB(Builder, ARM_MB::ISH);
21332 }
21333 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21334}
21335
21336// Loads and stores less than 64-bits are already atomic; ones above that
21337// are doomed anyway, so defer to the default libcall and blame the OS when
21338// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21339// anything for those.
21342 bool has64BitAtomicStore;
21343 if (Subtarget->isMClass())
21344 has64BitAtomicStore = false;
21345 else if (Subtarget->isThumb())
21346 has64BitAtomicStore = Subtarget->hasV7Ops();
21347 else
21348 has64BitAtomicStore = Subtarget->hasV6Ops();
21349
21350 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21351 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21353}
21354
21355// Loads and stores less than 64-bits are already atomic; ones above that
21356// are doomed anyway, so defer to the default libcall and blame the OS when
21357// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21358// anything for those.
21359// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21360// guarantee, see DDI0406C ARM architecture reference manual,
21361// sections A8.8.72-74 LDRD)
21364 bool has64BitAtomicLoad;
21365 if (Subtarget->isMClass())
21366 has64BitAtomicLoad = false;
21367 else if (Subtarget->isThumb())
21368 has64BitAtomicLoad = Subtarget->hasV7Ops();
21369 else
21370 has64BitAtomicLoad = Subtarget->hasV6Ops();
21371
21372 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21373 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21375}
21376
21377// For the real atomic operations, we have ldrex/strex up to 32 bits,
21378// and up to 64 bits on the non-M profiles
21381 if (AI->isFloatingPointOperation())
21383
21384 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21385 bool hasAtomicRMW;
21386 if (Subtarget->isMClass())
21387 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21388 else if (Subtarget->isThumb())
21389 hasAtomicRMW = Subtarget->hasV7Ops();
21390 else
21391 hasAtomicRMW = Subtarget->hasV6Ops();
21392 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21393 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21394 // implement atomicrmw without spilling. If the target address is also on
21395 // the stack and close enough to the spill slot, this can lead to a
21396 // situation where the monitor always gets cleared and the atomic operation
21397 // can never succeed. So at -O0 lower this operation to a CAS loop.
21398 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21401 }
21403}
21404
21405// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21406// bits, and up to 64 bits on the non-M profiles.
21409 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21410 // implement cmpxchg without spilling. If the address being exchanged is also
21411 // on the stack and close enough to the spill slot, this can lead to a
21412 // situation where the monitor always gets cleared and the atomic operation
21413 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21414 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21415 bool HasAtomicCmpXchg;
21416 if (Subtarget->isMClass())
21417 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21418 else if (Subtarget->isThumb())
21419 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21420 else
21421 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21422 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21423 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21426}
21427
21429 const Instruction *I) const {
21430 return InsertFencesForAtomic;
21431}
21432
21434 // ROPI/RWPI are not supported currently.
21435 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21436}
21437
21439 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21441
21442 // MSVC CRT has a global variable holding security cookie.
21443 M.getOrInsertGlobal("__security_cookie",
21444 PointerType::getUnqual(M.getContext()));
21445
21446 // MSVC CRT has a function to validate security cookie.
21447 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21448 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21449 PointerType::getUnqual(M.getContext()));
21450 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21451 F->addParamAttr(0, Attribute::AttrKind::InReg);
21452}
21453
21455 // MSVC CRT has a global variable holding security cookie.
21456 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21457 return M.getGlobalVariable("__security_cookie");
21459}
21460
21462 // MSVC CRT has a function to validate security cookie.
21463 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21464 return M.getFunction("__security_check_cookie");
21466}
21467
21469 unsigned &Cost) const {
21470 // If we do not have NEON, vector types are not natively supported.
21471 if (!Subtarget->hasNEON())
21472 return false;
21473
21474 // Floating point values and vector values map to the same register file.
21475 // Therefore, although we could do a store extract of a vector type, this is
21476 // better to leave at float as we have more freedom in the addressing mode for
21477 // those.
21478 if (VectorTy->isFPOrFPVectorTy())
21479 return false;
21480
21481 // If the index is unknown at compile time, this is very expensive to lower
21482 // and it is not possible to combine the store with the extract.
21483 if (!isa<ConstantInt>(Idx))
21484 return false;
21485
21486 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21487 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21488 // We can do a store + vector extract on any vector that fits perfectly in a D
21489 // or Q register.
21490 if (BitWidth == 64 || BitWidth == 128) {
21491 Cost = 0;
21492 return true;
21493 }
21494 return false;
21495}
21496
21498 return Subtarget->hasV6T2Ops();
21499}
21500
21502 return Subtarget->hasV6T2Ops();
21503}
21504
21506 const Instruction &AndI) const {
21507 if (!Subtarget->hasV7Ops())
21508 return false;
21509
21510 // Sink the `and` instruction only if the mask would fit into a modified
21511 // immediate operand.
21512 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21513 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21514 return false;
21515 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21516 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21517 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21518}
21519
21522 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21523 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21526 ExpansionFactor);
21527}
21528
21530 Value *Addr,
21531 AtomicOrdering Ord) const {
21532 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21533 bool IsAcquire = isAcquireOrStronger(Ord);
21534
21535 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21536 // intrinsic must return {i32, i32} and we have to recombine them into a
21537 // single i64 here.
21538 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21540 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21542
21543 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21544
21545 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21546 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21547 if (!Subtarget->isLittle())
21548 std::swap (Lo, Hi);
21549 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21550 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21551 return Builder.CreateOr(
21552 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21553 }
21554
21555 Type *Tys[] = { Addr->getType() };
21556 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21557 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21558 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21559
21560 CI->addParamAttr(
21561 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21562 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21563}
21564
21566 IRBuilderBase &Builder) const {
21567 if (!Subtarget->hasV7Ops())
21568 return;
21569 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21570 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21571}
21572
21574 Value *Val, Value *Addr,
21575 AtomicOrdering Ord) const {
21576 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21577 bool IsRelease = isReleaseOrStronger(Ord);
21578
21579 // Since the intrinsics must have legal type, the i64 intrinsics take two
21580 // parameters: "i32, i32". We must marshal Val into the appropriate form
21581 // before the call.
21582 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21584 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21586 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21587
21588 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21589 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21590 if (!Subtarget->isLittle())
21591 std::swap(Lo, Hi);
21592 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21593 }
21594
21595 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21596 Type *Tys[] = { Addr->getType() };
21597 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21598
21599 CallInst *CI = Builder.CreateCall(
21600 Strex, {Builder.CreateZExtOrBitCast(
21601 Val, Strex->getFunctionType()->getParamType(0)),
21602 Addr});
21603 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21604 Val->getType()));
21605 return CI;
21606}
21607
21608
21610 return Subtarget->isMClass();
21611}
21612
21613/// A helper function for determining the number of interleaved accesses we
21614/// will generate when lowering accesses of the given type.
21615unsigned
21617 const DataLayout &DL) const {
21618 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21619}
21620
21622 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21623 const DataLayout &DL) const {
21624
21625 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21626 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21627
21628 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21629 return false;
21630
21631 // Ensure the vector doesn't have f16 elements. Even though we could do an
21632 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21633 // f32.
21634 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21635 return false;
21636 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21637 return false;
21638
21639 // Ensure the number of vector elements is greater than 1.
21640 if (VecTy->getNumElements() < 2)
21641 return false;
21642
21643 // Ensure the element type is legal.
21644 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21645 return false;
21646 // And the alignment if high enough under MVE.
21647 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21648 return false;
21649
21650 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21651 // 128 will be split into multiple interleaved accesses.
21652 if (Subtarget->hasNEON() && VecSize == 64)
21653 return true;
21654 return VecSize % 128 == 0;
21655}
21656
21658 if (Subtarget->hasNEON())
21659 return 4;
21660 if (Subtarget->hasMVEIntegerOps())
21663}
21664
21665/// Lower an interleaved load into a vldN intrinsic.
21666///
21667/// E.g. Lower an interleaved load (Factor = 2):
21668/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21669/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21670/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21671///
21672/// Into:
21673/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21674/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21675/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21678 ArrayRef<unsigned> Indices, unsigned Factor) const {
21679 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21680 "Invalid interleave factor");
21681 assert(!Shuffles.empty() && "Empty shufflevector input");
21682 assert(Shuffles.size() == Indices.size() &&
21683 "Unmatched number of shufflevectors and indices");
21684
21685 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21686 Type *EltTy = VecTy->getElementType();
21687
21688 const DataLayout &DL = LI->getDataLayout();
21689 Align Alignment = LI->getAlign();
21690
21691 // Skip if we do not have NEON and skip illegal vector types. We can
21692 // "legalize" wide vector types into multiple interleaved accesses as long as
21693 // the vector types are divisible by 128.
21694 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21695 return false;
21696
21697 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21698
21699 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21700 // load integer vectors first and then convert to pointer vectors.
21701 if (EltTy->isPointerTy())
21702 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21703
21704 IRBuilder<> Builder(LI);
21705
21706 // The base address of the load.
21707 Value *BaseAddr = LI->getPointerOperand();
21708
21709 if (NumLoads > 1) {
21710 // If we're going to generate more than one load, reset the sub-vector type
21711 // to something legal.
21712 VecTy = FixedVectorType::get(VecTy->getElementType(),
21713 VecTy->getNumElements() / NumLoads);
21714 }
21715
21716 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21717
21718 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21719 if (Subtarget->hasNEON()) {
21720 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21721 Type *Tys[] = {VecTy, PtrTy};
21722 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21723 Intrinsic::arm_neon_vld3,
21724 Intrinsic::arm_neon_vld4};
21725 Function *VldnFunc =
21726 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21727
21729 Ops.push_back(BaseAddr);
21730 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21731
21732 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21733 } else {
21734 assert((Factor == 2 || Factor == 4) &&
21735 "expected interleave factor of 2 or 4 for MVE");
21736 Intrinsic::ID LoadInts =
21737 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21738 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21739 Type *Tys[] = {VecTy, PtrTy};
21740 Function *VldnFunc =
21741 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21742
21744 Ops.push_back(BaseAddr);
21745 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21746 }
21747 };
21748
21749 // Holds sub-vectors extracted from the load intrinsic return values. The
21750 // sub-vectors are associated with the shufflevector instructions they will
21751 // replace.
21753
21754 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21755 // If we're generating more than one load, compute the base address of
21756 // subsequent loads as an offset from the previous.
21757 if (LoadCount > 0)
21758 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21759 VecTy->getNumElements() * Factor);
21760
21761 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21762
21763 // Replace uses of each shufflevector with the corresponding vector loaded
21764 // by ldN.
21765 for (unsigned i = 0; i < Shuffles.size(); i++) {
21766 ShuffleVectorInst *SV = Shuffles[i];
21767 unsigned Index = Indices[i];
21768
21769 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21770
21771 // Convert the integer vector to pointer vector if the element is pointer.
21772 if (EltTy->isPointerTy())
21773 SubVec = Builder.CreateIntToPtr(
21774 SubVec,
21776
21777 SubVecs[SV].push_back(SubVec);
21778 }
21779 }
21780
21781 // Replace uses of the shufflevector instructions with the sub-vectors
21782 // returned by the load intrinsic. If a shufflevector instruction is
21783 // associated with more than one sub-vector, those sub-vectors will be
21784 // concatenated into a single wide vector.
21785 for (ShuffleVectorInst *SVI : Shuffles) {
21786 auto &SubVec = SubVecs[SVI];
21787 auto *WideVec =
21788 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21789 SVI->replaceAllUsesWith(WideVec);
21790 }
21791
21792 return true;
21793}
21794
21795/// Lower an interleaved store into a vstN intrinsic.
21796///
21797/// E.g. Lower an interleaved store (Factor = 3):
21798/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21799/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21800/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21801///
21802/// Into:
21803/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21804/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21805/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21806/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21807///
21808/// Note that the new shufflevectors will be removed and we'll only generate one
21809/// vst3 instruction in CodeGen.
21810///
21811/// Example for a more general valid mask (Factor 3). Lower:
21812/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21813/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21814/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21815///
21816/// Into:
21817/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21818/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21819/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21820/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21822 ShuffleVectorInst *SVI,
21823 unsigned Factor) const {
21824 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21825 "Invalid interleave factor");
21826
21827 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21828 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21829
21830 unsigned LaneLen = VecTy->getNumElements() / Factor;
21831 Type *EltTy = VecTy->getElementType();
21832 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21833
21834 const DataLayout &DL = SI->getDataLayout();
21835 Align Alignment = SI->getAlign();
21836
21837 // Skip if we do not have NEON and skip illegal vector types. We can
21838 // "legalize" wide vector types into multiple interleaved accesses as long as
21839 // the vector types are divisible by 128.
21840 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21841 return false;
21842
21843 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21844
21845 Value *Op0 = SVI->getOperand(0);
21846 Value *Op1 = SVI->getOperand(1);
21847 IRBuilder<> Builder(SI);
21848
21849 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21850 // vectors to integer vectors.
21851 if (EltTy->isPointerTy()) {
21852 Type *IntTy = DL.getIntPtrType(EltTy);
21853
21854 // Convert to the corresponding integer vector.
21855 auto *IntVecTy =
21856 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21857 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21858 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21859
21860 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21861 }
21862
21863 // The base address of the store.
21864 Value *BaseAddr = SI->getPointerOperand();
21865
21866 if (NumStores > 1) {
21867 // If we're going to generate more than one store, reset the lane length
21868 // and sub-vector type to something legal.
21869 LaneLen /= NumStores;
21870 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21871 }
21872
21873 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21874
21875 auto Mask = SVI->getShuffleMask();
21876
21877 auto createStoreIntrinsic = [&](Value *BaseAddr,
21878 SmallVectorImpl<Value *> &Shuffles) {
21879 if (Subtarget->hasNEON()) {
21880 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21881 Intrinsic::arm_neon_vst3,
21882 Intrinsic::arm_neon_vst4};
21883 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21884 Type *Tys[] = {PtrTy, SubVecTy};
21885
21887 SI->getModule(), StoreInts[Factor - 2], Tys);
21888
21890 Ops.push_back(BaseAddr);
21891 append_range(Ops, Shuffles);
21892 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21893 Builder.CreateCall(VstNFunc, Ops);
21894 } else {
21895 assert((Factor == 2 || Factor == 4) &&
21896 "expected interleave factor of 2 or 4 for MVE");
21897 Intrinsic::ID StoreInts =
21898 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21899 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21900 Type *Tys[] = {PtrTy, SubVecTy};
21901 Function *VstNFunc =
21902 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21903
21905 Ops.push_back(BaseAddr);
21906 append_range(Ops, Shuffles);
21907 for (unsigned F = 0; F < Factor; F++) {
21908 Ops.push_back(Builder.getInt32(F));
21909 Builder.CreateCall(VstNFunc, Ops);
21910 Ops.pop_back();
21911 }
21912 }
21913 };
21914
21915 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21916 // If we generating more than one store, we compute the base address of
21917 // subsequent stores as an offset from the previous.
21918 if (StoreCount > 0)
21919 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21920 BaseAddr, LaneLen * Factor);
21921
21922 SmallVector<Value *, 4> Shuffles;
21923
21924 // Split the shufflevector operands into sub vectors for the new vstN call.
21925 for (unsigned i = 0; i < Factor; i++) {
21926 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21927 if (Mask[IdxI] >= 0) {
21928 Shuffles.push_back(Builder.CreateShuffleVector(
21929 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21930 } else {
21931 unsigned StartMask = 0;
21932 for (unsigned j = 1; j < LaneLen; j++) {
21933 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21934 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21935 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21936 break;
21937 }
21938 }
21939 // Note: If all elements in a chunk are undefs, StartMask=0!
21940 // Note: Filling undef gaps with random elements is ok, since
21941 // those elements were being written anyway (with undefs).
21942 // In the case of all undefs we're defaulting to using elems from 0
21943 // Note: StartMask cannot be negative, it's checked in
21944 // isReInterleaveMask
21945 Shuffles.push_back(Builder.CreateShuffleVector(
21946 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21947 }
21948 }
21949
21950 createStoreIntrinsic(BaseAddr, Shuffles);
21951 }
21952 return true;
21953}
21954
21962
21964 uint64_t &Members) {
21965 if (auto *ST = dyn_cast<StructType>(Ty)) {
21966 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21967 uint64_t SubMembers = 0;
21968 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21969 return false;
21970 Members += SubMembers;
21971 }
21972 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21973 uint64_t SubMembers = 0;
21974 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21975 return false;
21976 Members += SubMembers * AT->getNumElements();
21977 } else if (Ty->isFloatTy()) {
21978 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21979 return false;
21980 Members = 1;
21981 Base = HA_FLOAT;
21982 } else if (Ty->isDoubleTy()) {
21983 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21984 return false;
21985 Members = 1;
21986 Base = HA_DOUBLE;
21987 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21988 Members = 1;
21989 switch (Base) {
21990 case HA_FLOAT:
21991 case HA_DOUBLE:
21992 return false;
21993 case HA_VECT64:
21994 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21995 case HA_VECT128:
21996 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21997 case HA_UNKNOWN:
21998 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21999 case 64:
22000 Base = HA_VECT64;
22001 return true;
22002 case 128:
22003 Base = HA_VECT128;
22004 return true;
22005 default:
22006 return false;
22007 }
22008 }
22009 }
22010
22011 return (Members > 0 && Members <= 4);
22012}
22013
22014/// Return the correct alignment for the current calling convention.
22016 Type *ArgTy, const DataLayout &DL) const {
22017 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
22018 if (!ArgTy->isVectorTy())
22019 return ABITypeAlign;
22020
22021 // Avoid over-aligning vector parameters. It would require realigning the
22022 // stack and waste space for no real benefit.
22023 return std::min(ABITypeAlign, DL.getStackAlignment());
22024}
22025
22026/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
22027/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
22028/// passing according to AAPCS rules.
22030 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22031 const DataLayout &DL) const {
22032 if (getEffectiveCallingConv(CallConv, isVarArg) !=
22034 return false;
22035
22037 uint64_t Members = 0;
22038 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
22039 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22040
22041 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22042 return IsHA || IsIntArray;
22043}
22044
22046 const Constant *PersonalityFn) const {
22047 // Platforms which do not use SjLj EH may return values in these registers
22048 // via the personality function.
22049 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22050}
22051
22053 const Constant *PersonalityFn) const {
22054 // Platforms which do not use SjLj EH may return values in these registers
22055 // via the personality function.
22056 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22057}
22058
22059void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
22060 // Update IsSplitCSR in ARMFunctionInfo.
22061 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22062 AFI->setIsSplitCSR(true);
22063}
22064
22065void ARMTargetLowering::insertCopiesSplitCSR(
22066 MachineBasicBlock *Entry,
22067 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
22068 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22069 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22070 if (!IStart)
22071 return;
22072
22073 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22074 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22075 MachineBasicBlock::iterator MBBI = Entry->begin();
22076 for (const MCPhysReg *I = IStart; *I; ++I) {
22077 const TargetRegisterClass *RC = nullptr;
22078 if (ARM::GPRRegClass.contains(*I))
22079 RC = &ARM::GPRRegClass;
22080 else if (ARM::DPRRegClass.contains(*I))
22081 RC = &ARM::DPRRegClass;
22082 else
22083 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
22084
22085 Register NewVR = MRI->createVirtualRegister(RC);
22086 // Create copy from CSR to a virtual register.
22087 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22088 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22089 // nounwind. If we want to generalize this later, we may need to emit
22090 // CFI pseudo-instructions.
22091 assert(Entry->getParent()->getFunction().hasFnAttribute(
22092 Attribute::NoUnwind) &&
22093 "Function should be nounwind in insertCopiesSplitCSR!");
22094 Entry->addLiveIn(*I);
22095 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22096 .addReg(*I);
22097
22098 // Insert the copy-back instructions right before the terminator.
22099 for (auto *Exit : Exits)
22100 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22101 TII->get(TargetOpcode::COPY), *I)
22102 .addReg(NewVR);
22103 }
22104}
22105
22109}
22110
22112 return Subtarget->hasMVEIntegerOps();
22113}
22114
22117 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22118 if (!VTy)
22119 return false;
22120
22121 auto *ScalarTy = VTy->getScalarType();
22122 unsigned NumElements = VTy->getNumElements();
22123
22124 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22125 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22126 return false;
22127
22128 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22129 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22130 return Subtarget->hasMVEFloatOps();
22131
22133 return false;
22134
22135 return Subtarget->hasMVEIntegerOps() &&
22136 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22137 ScalarTy->isIntegerTy(32));
22138}
22139
22142 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22143 Value *Accumulator) const {
22144
22145 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22146
22147 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22148
22149 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22150
22151 if (TyWidth > 128) {
22152 int Stride = Ty->getNumElements() / 2;
22153 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22154 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22155 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22156 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22157
22158 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22159 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22160 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22161 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22162 Value *LowerSplitAcc = nullptr;
22163 Value *UpperSplitAcc = nullptr;
22164
22165 if (Accumulator) {
22166 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22167 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22168 }
22169
22170 auto *LowerSplitInt = createComplexDeinterleavingIR(
22171 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22172 auto *UpperSplitInt = createComplexDeinterleavingIR(
22173 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22174
22175 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22176 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22177 }
22178
22179 auto *IntTy = Type::getInt32Ty(B.getContext());
22180
22181 ConstantInt *ConstRotation = nullptr;
22182 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22183 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22184
22185 if (Accumulator)
22186 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22187 {ConstRotation, Accumulator, InputB, InputA});
22188 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22189 {ConstRotation, InputB, InputA});
22190 }
22191
22192 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22193 // 1 means the value is not halved.
22194 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22195
22197 ConstRotation = ConstantInt::get(IntTy, 0);
22199 ConstRotation = ConstantInt::get(IntTy, 1);
22200
22201 if (!ConstRotation)
22202 return nullptr; // Invalid rotation for arm_mve_vcaddq
22203
22204 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22205 {ConstHalving, ConstRotation, InputA, InputB});
22206 }
22207
22208 return nullptr;
22209}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
static const LLT F64
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1393
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1235
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1181
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned logBase2() const
Definition: APInt.h:1719
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:455
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:312
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:321
bool hasARMOps() const
Definition: ARMSubtarget.h:265
bool supportsTailCall() const
Definition: ARMSubtarget.h:399
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:298
bool hasVFP4Base() const
Definition: ARMSubtarget.h:273
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool useFPVFMx() const
Definition: ARMSubtarget.h:282
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:288
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
bool useSjLjEH() const
Definition: ARMSubtarget.h:287
bool isTargetDarwin() const
Definition: ARMSubtarget.h:300
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isTargetAndroid() const
Definition: ARMSubtarget.h:350
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:310
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:326
bool hasVFP3Base() const
Definition: ARMSubtarget.h:272
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:286
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:302
bool hasMinSize() const
Definition: ARMSubtarget.h:363
bool isTargetIOS() const
Definition: ARMSubtarget.h:301
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:267
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:433
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:303
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:276
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:304
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:407
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:401
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:331
bool isTargetLinux() const
Definition: ARMSubtarget.h:305
bool useFPVFMx16() const
Definition: ARMSubtarget.h:285
bool isMClass() const
Definition: ARMSubtarget.h:366
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:486
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:280
bool isTargetELF() const
Definition: ARMSubtarget.h:311
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:443
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
bool isFloatingPointOperation() const
Definition: Instructions.h:864
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:94
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1594
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getStackAlignment() const
Definition: DataLayout.h:271
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:332
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
unsigned getNumElements() const
Definition: DerivedTypes.h:582
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
arg_iterator arg_begin()
Definition: Function.h:831
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:679
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:225
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1884
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2026
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2158
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:230
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:734
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:488
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:744
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:840
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:785
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:688
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:811
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const unsigned char * bytes_end() const
Definition: StringRef.h:118
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const unsigned char * bytes_begin() const
Definition: StringRef.h:115
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:399
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:500
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:634
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:252
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
Type * getArrayElementType() const
Definition: Type.h:404
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:778
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1167
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1163
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:751
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1039
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1411
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:511
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1310
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:742
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1196
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1312
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1282
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1313
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1043
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1072
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1062
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:811
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:157
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:818
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1396
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1274
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1066
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1410
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:941
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1308
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:931
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1309
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:974
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1451
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:913
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:802
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1088
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1393
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:750
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1262
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1397
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1029
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:786
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:958
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1118
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1311
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1097
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:755
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1278
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1412
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1192
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1405
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:908
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1057
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1034
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:733
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1306
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:808
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1252
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:884
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:770
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1289
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1314
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1006
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1082
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:826
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:916
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:764
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1413
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1304
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1025
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1305
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:864
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1223
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1249
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1394
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1303
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:979
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:897
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:112
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:883
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:814
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1187
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1111
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:791
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1607
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1523
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1574
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1554
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1525
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1541
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1312
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ MVEVMVNModImm
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:254
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:274
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:290
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:51
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:797
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)