LLVM 20.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
160 SelectionDAG &DAG, const SDLoc &DL) {
162 assert(Arg.ArgVT.bitsLT(MVT::i32));
163 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
164 SDValue Ext =
166 MVT::i32, Trunc);
167 return Ext;
168}
169
170void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
171 if (VT != PromotedLdStVT) {
173 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
174
176 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
177 }
178
179 MVT ElemTy = VT.getVectorElementType();
180 if (ElemTy != MVT::f64)
184 if (ElemTy == MVT::i32) {
189 } else {
194 }
203 if (VT.isInteger()) {
207 }
208
209 // Neon does not support vector divide/remainder operations.
218
219 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
220 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
222 setOperationAction(Opcode, VT, Legal);
223 if (!VT.isFloatingPoint())
224 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
225 setOperationAction(Opcode, VT, Legal);
226}
227
228void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
229 addRegisterClass(VT, &ARM::DPRRegClass);
230 addTypeForNEON(VT, MVT::f64);
231}
232
233void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
234 addRegisterClass(VT, &ARM::DPairRegClass);
235 addTypeForNEON(VT, MVT::v2f64);
236}
237
238void ARMTargetLowering::setAllExpand(MVT VT) {
239 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
240 setOperationAction(Opc, VT, Expand);
241
242 // We support these really simple operations even on types where all
243 // the actual arithmetic has to be broken down into simpler
244 // operations or turned into library calls.
249}
250
251void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
252 LegalizeAction Action) {
253 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
254 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
256}
257
258void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
259 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
260
261 for (auto VT : IntTypes) {
262 addRegisterClass(VT, &ARM::MQPRRegClass);
292
293 // No native support for these.
303
304 // Vector reductions
314
315 if (!HasMVEFP) {
320 } else {
323 }
324
325 // Pre and Post inc are supported on loads and stores
326 for (unsigned im = (unsigned)ISD::PRE_INC;
332 }
333 }
334
335 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
336 for (auto VT : FloatTypes) {
337 addRegisterClass(VT, &ARM::MQPRRegClass);
338 if (!HasMVEFP)
339 setAllExpand(VT);
340
341 // These are legal or custom whether we have MVE.fp or not
354
355 // Pre and Post inc are supported on loads and stores
356 for (unsigned im = (unsigned)ISD::PRE_INC;
362 }
363
364 if (HasMVEFP) {
372
373 // No native support for these.
388 }
389 }
390
391 // Custom Expand smaller than legal vector reductions to prevent false zero
392 // items being added.
401
402 // We 'support' these types up to bitcast/load/store level, regardless of
403 // MVE integer-only / float support. Only doing FP data processing on the FP
404 // vector types is inhibited at integer-only level.
405 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
406 for (auto VT : LongTypes) {
407 addRegisterClass(VT, &ARM::MQPRRegClass);
408 setAllExpand(VT);
414 }
416
417 // We can do bitwise operations on v2i64 vectors
418 setOperationAction(ISD::AND, MVT::v2i64, Legal);
419 setOperationAction(ISD::OR, MVT::v2i64, Legal);
420 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
421
422 // It is legal to extload from v4i8 to v4i16 or v4i32.
423 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
424 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
426
427 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
433
434 // Some truncating stores are legal too.
435 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
436 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
437 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
438
439 // Pre and Post inc on these are legal, given the correct extends
440 for (unsigned im = (unsigned)ISD::PRE_INC;
442 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
447 }
448 }
449
450 // Predicate types
451 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
452 for (auto VT : pTypes) {
453 addRegisterClass(VT, &ARM::VCCRRegClass);
468
469 if (!HasMVEFP) {
474 }
475 }
479 setOperationAction(ISD::OR, MVT::v2i1, Expand);
485
494}
495
497 const ARMSubtarget &STI)
498 : TargetLowering(TM), Subtarget(&STI) {
499 RegInfo = Subtarget->getRegisterInfo();
500 Itins = Subtarget->getInstrItineraryData();
501
504
505 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
506 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
507 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
508 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
509 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
510 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
512 }
513
514 if (Subtarget->isTargetMachO()) {
515 // Uses VFP for Thumb libfuncs if available.
516 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
517 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
518 static const struct {
519 const RTLIB::Libcall Op;
520 const char * const Name;
521 const ISD::CondCode Cond;
522 } LibraryCalls[] = {
523 // Single-precision floating-point arithmetic.
524 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
525 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
528
529 // Double-precision floating-point arithmetic.
530 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
531 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
534
535 // Single-precision comparisons.
536 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
537 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
538 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
539 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
540 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
541 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
542 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
543
544 // Double-precision comparisons.
545 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
546 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
547 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
548 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
549 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
550 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
551 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
552
553 // Floating-point to integer conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
557 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
560
561 // Conversions between floating types.
562 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
563 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
564
565 // Integer to floating-point conversions.
566 // i64 conversions are done via library routines even when generating VFP
567 // instructions, so use the same ones.
568 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
569 // e.g., __floatunsidf vs. __floatunssidfvfp.
570 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
571 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
573 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
574 };
575
576 for (const auto &LC : LibraryCalls) {
577 setLibcallName(LC.Op, LC.Name);
578 if (LC.Cond != ISD::SETCC_INVALID)
579 setCmpLibcallCC(LC.Op, LC.Cond);
580 }
581 }
582 }
583
584 // RTLIB
585 if (Subtarget->isAAPCS_ABI() &&
586 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
587 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
588 static const struct {
589 const RTLIB::Libcall Op;
590 const char * const Name;
591 const CallingConv::ID CC;
592 const ISD::CondCode Cond;
593 } LibraryCalls[] = {
594 // Double-precision floating-point arithmetic helper functions
595 // RTABI chapter 4.1.2, Table 2
596 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600
601 // Double-precision floating-point comparison helper functions
602 // RTABI chapter 4.1.2, Table 3
603 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
605 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
606 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
610
611 // Single-precision floating-point arithmetic helper functions
612 // RTABI chapter 4.1.2, Table 4
613 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617
618 // Single-precision floating-point comparison helper functions
619 // RTABI chapter 4.1.2, Table 5
620 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
622 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
623 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
627
628 // Floating-point to integer conversions.
629 // RTABI chapter 4.1.2, Table 6
630 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638
639 // Conversions between floating types.
640 // RTABI chapter 4.1.2, Table 7
641 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644
645 // Integer to floating-point conversions.
646 // RTABI chapter 4.1.2, Table 8
647 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655
656 // Long long helper functions
657 // RTABI chapter 4.2, Table 9
658 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662
663 // Integer division functions
664 // RTABI chapter 4.3.1
665 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 };
674
675 for (const auto &LC : LibraryCalls) {
676 setLibcallName(LC.Op, LC.Name);
677 setLibcallCallingConv(LC.Op, LC.CC);
678 if (LC.Cond != ISD::SETCC_INVALID)
679 setCmpLibcallCC(LC.Op, LC.Cond);
680 }
681
682 // EABI dependent RTLIB
683 if (TM.Options.EABIVersion == EABI::EABI4 ||
684 TM.Options.EABIVersion == EABI::EABI5) {
685 static const struct {
686 const RTLIB::Libcall Op;
687 const char *const Name;
688 const CallingConv::ID CC;
689 const ISD::CondCode Cond;
690 } MemOpsLibraryCalls[] = {
691 // Memory operations
692 // RTABI chapter 4.3.4
693 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
694 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 };
697
698 for (const auto &LC : MemOpsLibraryCalls) {
699 setLibcallName(LC.Op, LC.Name);
700 setLibcallCallingConv(LC.Op, LC.CC);
701 if (LC.Cond != ISD::SETCC_INVALID)
702 setCmpLibcallCC(LC.Op, LC.Cond);
703 }
704 }
705 }
706
707 if (Subtarget->isTargetWindows()) {
708 static const struct {
709 const RTLIB::Libcall Op;
710 const char * const Name;
711 const CallingConv::ID CC;
712 } LibraryCalls[] = {
713 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
721 };
722
723 for (const auto &LC : LibraryCalls) {
724 setLibcallName(LC.Op, LC.Name);
725 setLibcallCallingConv(LC.Op, LC.CC);
726 }
727 }
728
729 // Use divmod compiler-rt calls for iOS 5.0 and later.
730 if (Subtarget->isTargetMachO() &&
731 !(Subtarget->isTargetIOS() &&
732 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
733 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
734 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
735 }
736
737 // The half <-> float conversion functions are always soft-float on
738 // non-watchos platforms, but are needed for some targets which use a
739 // hard-float calling convention by default.
740 if (!Subtarget->isTargetWatchABI()) {
741 if (Subtarget->isAAPCS_ABI()) {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
745 } else {
746 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
747 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
749 }
750 }
751
752 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
753 // a __gnu_ prefix (which is the default).
754 if (Subtarget->isTargetAEABI()) {
755 static const struct {
756 const RTLIB::Libcall Op;
757 const char * const Name;
758 const CallingConv::ID CC;
759 } LibraryCalls[] = {
760 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
761 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
763 };
764
765 for (const auto &LC : LibraryCalls) {
766 setLibcallName(LC.Op, LC.Name);
767 setLibcallCallingConv(LC.Op, LC.CC);
768 }
769 }
770
771 if (Subtarget->isThumb1Only())
772 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
773 else
774 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
775
776 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
777 Subtarget->hasFPRegs()) {
778 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
779 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
780
785
786 if (!Subtarget->hasVFP2Base())
787 setAllExpand(MVT::f32);
788 if (!Subtarget->hasFP64())
789 setAllExpand(MVT::f64);
790 }
791
792 if (Subtarget->hasFullFP16()) {
793 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
796
799 }
800
801 if (Subtarget->hasBF16()) {
802 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
803 setAllExpand(MVT::bf16);
804 if (!Subtarget->hasFullFP16())
806 }
807
809 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
810 setTruncStoreAction(VT, InnerVT, Expand);
811 addAllExtLoads(VT, InnerVT, Expand);
812 }
813
816
818 }
819
822
825
826 if (Subtarget->hasMVEIntegerOps())
827 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
828
829 // Combine low-overhead loop intrinsics so that we can lower i1 types.
830 if (Subtarget->hasLOB()) {
832 }
833
834 if (Subtarget->hasNEON()) {
835 addDRTypeForNEON(MVT::v2f32);
836 addDRTypeForNEON(MVT::v8i8);
837 addDRTypeForNEON(MVT::v4i16);
838 addDRTypeForNEON(MVT::v2i32);
839 addDRTypeForNEON(MVT::v1i64);
840
841 addQRTypeForNEON(MVT::v4f32);
842 addQRTypeForNEON(MVT::v2f64);
843 addQRTypeForNEON(MVT::v16i8);
844 addQRTypeForNEON(MVT::v8i16);
845 addQRTypeForNEON(MVT::v4i32);
846 addQRTypeForNEON(MVT::v2i64);
847
848 if (Subtarget->hasFullFP16()) {
849 addQRTypeForNEON(MVT::v8f16);
850 addDRTypeForNEON(MVT::v4f16);
851 }
852
853 if (Subtarget->hasBF16()) {
854 addQRTypeForNEON(MVT::v8bf16);
855 addDRTypeForNEON(MVT::v4bf16);
856 }
857 }
858
859 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
860 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
861 // none of Neon, MVE or VFP supports any arithmetic operations on it.
862 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
863 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
864 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
865 // FIXME: Code duplication: FDIV and FREM are expanded always, see
866 // ARMTargetLowering::addTypeForNEON method for details.
867 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
868 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
869 // FIXME: Create unittest.
870 // In another words, find a way when "copysign" appears in DAG with vector
871 // operands.
873 // FIXME: Code duplication: SETCC has custom operation action, see
874 // ARMTargetLowering::addTypeForNEON method for details.
876 // FIXME: Create unittest for FNEG and for FABS.
877 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
878 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
880 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
881 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
882 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
883 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
884 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
887 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
890 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
896 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
897 }
898
899 if (Subtarget->hasNEON()) {
900 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
901 // supported for v4f32.
903 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
904 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
905 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
906 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
907 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
910 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
918
919 // Mark v2f32 intrinsics.
921 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
922 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
923 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
924 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
925 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
928 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
936
937 // Neon does not support some operations on v1i64 and v2i64 types.
938 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
939 // Custom handling for some quad-vector types to detect VMULL.
940 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
941 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
942 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
943 // Custom handling for some vector types to avoid expensive expansions
944 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
946 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
948 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
949 // a destination type that is wider than the source, and nor does
950 // it have a FP_TO_[SU]INT instruction with a narrower destination than
951 // source.
960
963
964 // NEON does not have single instruction CTPOP for vectors with element
965 // types wider than 8-bits. However, custom lowering can leverage the
966 // v8i8/v16i8 vcnt instruction.
973
974 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
975 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
976
977 // NEON does not have single instruction CTTZ for vectors.
979 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
980 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
981 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
982
983 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
986 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
987
992
997
1001 }
1002
1003 // NEON only has FMA instructions as of VFP4.
1004 if (!Subtarget->hasVFP4Base()) {
1005 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1006 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1007 }
1008
1011
1012 // It is legal to extload from v4i8 to v4i16 or v4i32.
1013 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1014 MVT::v2i32}) {
1019 }
1020 }
1021
1022 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1023 MVT::v4i32}) {
1028 }
1029 }
1030
1031 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1038 }
1039 if (Subtarget->hasMVEIntegerOps()) {
1042 ISD::SETCC});
1043 }
1044 if (Subtarget->hasMVEFloatOps()) {
1046 }
1047
1048 if (!Subtarget->hasFP64()) {
1049 // When targeting a floating-point unit with only single-precision
1050 // operations, f64 is legal for the few double-precision instructions which
1051 // are present However, no double-precision operations other than moves,
1052 // loads and stores are provided by the hardware.
1090 }
1091
1092 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1095 if (Subtarget->hasFullFP16()) {
1098 }
1099 }
1100
1101 if (!Subtarget->hasFP16()) {
1104 }
1105
1107
1108 // ARM does not have floating-point extending loads.
1109 for (MVT VT : MVT::fp_valuetypes()) {
1110 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1111 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1112 }
1113
1114 // ... or truncating stores
1115 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1116 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1117 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1118
1119 // ARM does not have i1 sign extending load.
1120 for (MVT VT : MVT::integer_valuetypes())
1121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1122
1123 // ARM supports all 4 flavors of integer indexed load / store.
1124 if (!Subtarget->isThumb1Only()) {
1125 for (unsigned im = (unsigned)ISD::PRE_INC;
1127 setIndexedLoadAction(im, MVT::i1, Legal);
1128 setIndexedLoadAction(im, MVT::i8, Legal);
1129 setIndexedLoadAction(im, MVT::i16, Legal);
1130 setIndexedLoadAction(im, MVT::i32, Legal);
1131 setIndexedStoreAction(im, MVT::i1, Legal);
1132 setIndexedStoreAction(im, MVT::i8, Legal);
1133 setIndexedStoreAction(im, MVT::i16, Legal);
1134 setIndexedStoreAction(im, MVT::i32, Legal);
1135 }
1136 } else {
1137 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1140 }
1141
1146
1149 if (Subtarget->hasDSP()) {
1158 }
1159 if (Subtarget->hasBaseDSP()) {
1162 }
1163
1164 // i64 operation support.
1167 if (Subtarget->isThumb1Only()) {
1170 }
1171 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1172 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1174
1184
1185 // MVE lowers 64 bit shifts to lsll and lsrl
1186 // assuming that ISD::SRL and SRA of i64 are already marked custom
1187 if (Subtarget->hasMVEIntegerOps())
1189
1190 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1191 if (Subtarget->isThumb1Only()) {
1195 }
1196
1197 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1199
1200 // ARM does not have ROTL.
1205 }
1208 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1211 }
1212
1213 // @llvm.readcyclecounter requires the Performance Monitors extension.
1214 // Default to the 0 expansion on unsupported platforms.
1215 // FIXME: Technically there are older ARM CPUs that have
1216 // implementation-specific ways of obtaining this information.
1217 if (Subtarget->hasPerfMon())
1219
1220 // Only ARMv6 has BSWAP.
1221 if (!Subtarget->hasV6Ops())
1223
1224 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1225 : Subtarget->hasDivideInARMMode();
1226 if (!hasDivide) {
1227 // These are expanded into libcalls if the cpu doesn't have HW divider.
1230 }
1231
1232 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1235
1238 }
1239
1242
1243 // Register based DivRem for AEABI (RTABI 4.2)
1244 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1245 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1246 Subtarget->isTargetWindows()) {
1249 HasStandaloneRem = false;
1250
1251 if (Subtarget->isTargetWindows()) {
1252 const struct {
1253 const RTLIB::Libcall Op;
1254 const char * const Name;
1255 const CallingConv::ID CC;
1256 } LibraryCalls[] = {
1257 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1259 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1260 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1261
1262 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1263 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1264 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1265 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1266 };
1267
1268 for (const auto &LC : LibraryCalls) {
1269 setLibcallName(LC.Op, LC.Name);
1270 setLibcallCallingConv(LC.Op, LC.CC);
1271 }
1272 } else {
1273 const struct {
1274 const RTLIB::Libcall Op;
1275 const char * const Name;
1276 const CallingConv::ID CC;
1277 } LibraryCalls[] = {
1278 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1280 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1281 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1282
1283 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1284 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1285 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1286 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1287 };
1288
1289 for (const auto &LC : LibraryCalls) {
1290 setLibcallName(LC.Op, LC.Name);
1291 setLibcallCallingConv(LC.Op, LC.CC);
1292 }
1293 }
1294
1299 } else {
1302 }
1303
1308
1309 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1311
1312 // Use the default implementation.
1314 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1316 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1319
1320 if (Subtarget->isTargetWindows())
1322 else
1324
1325 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1326 // the default expansion.
1327 InsertFencesForAtomic = false;
1328 if (Subtarget->hasAnyDataBarrier() &&
1329 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1330 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1331 // to ldrex/strex loops already.
1333 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1335
1336 // On v8, we have particularly efficient implementations of atomic fences
1337 // if they can be combined with nearby atomic loads and stores.
1338 if (!Subtarget->hasAcquireRelease() ||
1339 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1340 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1341 InsertFencesForAtomic = true;
1342 }
1343 } else {
1344 // If there's anything we can use as a barrier, go through custom lowering
1345 // for ATOMIC_FENCE.
1346 // If target has DMB in thumb, Fences can be inserted.
1347 if (Subtarget->hasDataBarrier())
1348 InsertFencesForAtomic = true;
1349
1351 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1352
1353 // Set them all for libcall, which will force libcalls.
1366 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1367 // Unordered/Monotonic case.
1368 if (!InsertFencesForAtomic) {
1371 }
1372 }
1373
1374 // Compute supported atomic widths.
1375 if (Subtarget->isTargetLinux() ||
1376 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1377 // For targets where __sync_* routines are reliably available, we use them
1378 // if necessary.
1379 //
1380 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1381 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1382 //
1383 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1384 // such targets should provide __sync_* routines, which use the ARM mode
1385 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1386 // encoding; see ARMISD::MEMBARRIER_MCR.)
1388 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1389 Subtarget->hasForced32BitAtomics()) {
1390 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1392 } else {
1393 // We can't assume anything about other targets; just use libatomic
1394 // routines.
1396 }
1397
1399
1401
1402 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1403 if (!Subtarget->hasV6Ops()) {
1406 }
1408
1409 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1410 !Subtarget->isThumb1Only()) {
1411 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1412 // iff target supports vfp2.
1422 }
1423
1424 // We want to custom lower some of our intrinsics.
1429 if (Subtarget->useSjLjEH())
1430 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1431
1441 if (Subtarget->hasFullFP16()) {
1445 }
1446
1448
1451 if (Subtarget->hasFullFP16())
1455 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1456
1457 // We don't support sin/cos/fmod/copysign/pow
1466 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1467 !Subtarget->isThumb1Only()) {
1470 }
1473
1474 if (!Subtarget->hasVFP4Base()) {
1477 }
1478
1479 // Various VFP goodness
1480 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1481 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1482 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1485 }
1486
1487 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1488 if (!Subtarget->hasFP16()) {
1491 }
1492
1493 // Strict floating-point comparisons need custom lowering.
1500 }
1501
1502 // Use __sincos_stret if available.
1503 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1504 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1507 }
1508
1509 // FP-ARMv8 implements a lot of rounding-like FP operations.
1510 if (Subtarget->hasFPARMv8Base()) {
1519 if (Subtarget->hasNEON()) {
1524 }
1525
1526 if (Subtarget->hasFP64()) {
1535 }
1536 }
1537
1538 // FP16 often need to be promoted to call lib functions
1539 if (Subtarget->hasFullFP16()) {
1554
1556 }
1557
1558 if (Subtarget->hasNEON()) {
1559 // vmin and vmax aren't available in a scalar form, so we can use
1560 // a NEON instruction with an undef lane instead.
1569
1570 if (Subtarget->hasFullFP16()) {
1575
1580 }
1581 }
1582
1583 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1584 // it, but it's just a wrapper around ldexp.
1585 if (Subtarget->isTargetWindows()) {
1587 if (isOperationExpand(Op, MVT::f32))
1588 setOperationAction(Op, MVT::f32, Promote);
1589 }
1590
1591 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1592 // isn't legal.
1594 if (isOperationExpand(Op, MVT::f16))
1595 setOperationAction(Op, MVT::f16, Promote);
1596
1597 // We have target-specific dag combine patterns for the following nodes:
1598 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1601
1602 if (Subtarget->hasMVEIntegerOps())
1604
1605 if (Subtarget->hasV6Ops())
1607 if (Subtarget->isThumb1Only())
1609 // Attempt to lower smin/smax to ssat/usat
1610 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1611 Subtarget->isThumb2()) {
1613 }
1614
1616
1617 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1618 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1620 else
1622
1623 //// temporary - rewrite interface to use type
1626 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1628 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1630
1631 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1632 // are at least 4 bytes aligned.
1634
1635 // Prefer likely predicted branches to selects on out-of-order cores.
1636 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1637
1638 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1640
1641 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1642}
1643
1645 return Subtarget->useSoftFloat();
1646}
1647
1648// FIXME: It might make sense to define the representative register class as the
1649// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1650// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1651// SPR's representative would be DPR_VFP2. This should work well if register
1652// pressure tracking were modified such that a register use would increment the
1653// pressure of the register class's representative and all of it's super
1654// classes' representatives transitively. We have not implemented this because
1655// of the difficulty prior to coalescing of modeling operand register classes
1656// due to the common occurrence of cross class copies and subregister insertions
1657// and extractions.
1658std::pair<const TargetRegisterClass *, uint8_t>
1660 MVT VT) const {
1661 const TargetRegisterClass *RRC = nullptr;
1662 uint8_t Cost = 1;
1663 switch (VT.SimpleTy) {
1664 default:
1666 // Use DPR as representative register class for all floating point
1667 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1668 // the cost is 1 for both f32 and f64.
1669 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1670 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1671 RRC = &ARM::DPRRegClass;
1672 // When NEON is used for SP, only half of the register file is available
1673 // because operations that define both SP and DP results will be constrained
1674 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1675 // coalescing by double-counting the SP regs. See the FIXME above.
1676 if (Subtarget->useNEONForSinglePrecisionFP())
1677 Cost = 2;
1678 break;
1679 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1680 case MVT::v4f32: case MVT::v2f64:
1681 RRC = &ARM::DPRRegClass;
1682 Cost = 2;
1683 break;
1684 case MVT::v4i64:
1685 RRC = &ARM::DPRRegClass;
1686 Cost = 4;
1687 break;
1688 case MVT::v8i64:
1689 RRC = &ARM::DPRRegClass;
1690 Cost = 8;
1691 break;
1692 }
1693 return std::make_pair(RRC, Cost);
1694}
1695
1696const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1697#define MAKE_CASE(V) \
1698 case V: \
1699 return #V;
1700 switch ((ARMISD::NodeType)Opcode) {
1702 break;
1905#undef MAKE_CASE
1906 }
1907 return nullptr;
1908}
1909
1911 EVT VT) const {
1912 if (!VT.isVector())
1913 return getPointerTy(DL);
1914
1915 // MVE has a predicate register.
1916 if ((Subtarget->hasMVEIntegerOps() &&
1917 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1918 VT == MVT::v16i8)) ||
1919 (Subtarget->hasMVEFloatOps() &&
1920 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1921 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1923}
1924
1925/// getRegClassFor - Return the register class that should be used for the
1926/// specified value type.
1927const TargetRegisterClass *
1928ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1929 (void)isDivergent;
1930 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1931 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1932 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1933 // MVE Q registers.
1934 if (Subtarget->hasNEON()) {
1935 if (VT == MVT::v4i64)
1936 return &ARM::QQPRRegClass;
1937 if (VT == MVT::v8i64)
1938 return &ARM::QQQQPRRegClass;
1939 }
1940 if (Subtarget->hasMVEIntegerOps()) {
1941 if (VT == MVT::v4i64)
1942 return &ARM::MQQPRRegClass;
1943 if (VT == MVT::v8i64)
1944 return &ARM::MQQQQPRRegClass;
1945 }
1947}
1948
1949// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1950// source/dest is aligned and the copy size is large enough. We therefore want
1951// to align such objects passed to memory intrinsics.
1953 Align &PrefAlign) const {
1954 if (!isa<MemIntrinsic>(CI))
1955 return false;
1956 MinSize = 8;
1957 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1958 // cycle faster than 4-byte aligned LDM.
1959 PrefAlign =
1960 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1961 return true;
1962}
1963
1964// Create a fast isel object.
1965FastISel *
1967 const TargetLibraryInfo *libInfo) const {
1968 return ARM::createFastISel(funcInfo, libInfo);
1969}
1970
1972 unsigned NumVals = N->getNumValues();
1973 if (!NumVals)
1974 return Sched::RegPressure;
1975
1976 for (unsigned i = 0; i != NumVals; ++i) {
1977 EVT VT = N->getValueType(i);
1978 if (VT == MVT::Glue || VT == MVT::Other)
1979 continue;
1980 if (VT.isFloatingPoint() || VT.isVector())
1981 return Sched::ILP;
1982 }
1983
1984 if (!N->isMachineOpcode())
1985 return Sched::RegPressure;
1986
1987 // Load are scheduled for latency even if there instruction itinerary
1988 // is not available.
1989 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1990 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1991
1992 if (MCID.getNumDefs() == 0)
1993 return Sched::RegPressure;
1994 if (!Itins->isEmpty() &&
1995 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1996 return Sched::ILP;
1997
1998 return Sched::RegPressure;
1999}
2000
2001//===----------------------------------------------------------------------===//
2002// Lowering Code
2003//===----------------------------------------------------------------------===//
2004
2005static bool isSRL16(const SDValue &Op) {
2006 if (Op.getOpcode() != ISD::SRL)
2007 return false;
2008 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2009 return Const->getZExtValue() == 16;
2010 return false;
2011}
2012
2013static bool isSRA16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SRA)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021static bool isSHL16(const SDValue &Op) {
2022 if (Op.getOpcode() != ISD::SHL)
2023 return false;
2024 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2025 return Const->getZExtValue() == 16;
2026 return false;
2027}
2028
2029// Check for a signed 16-bit value. We special case SRA because it makes it
2030// more simple when also looking for SRAs that aren't sign extending a
2031// smaller value. Without the check, we'd need to take extra care with
2032// checking order for some operations.
2033static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2034 if (isSRA16(Op))
2035 return isSHL16(Op.getOperand(0));
2036 return DAG.ComputeNumSignBits(Op) == 17;
2037}
2038
2039/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2041 switch (CC) {
2042 default: llvm_unreachable("Unknown condition code!");
2043 case ISD::SETNE: return ARMCC::NE;
2044 case ISD::SETEQ: return ARMCC::EQ;
2045 case ISD::SETGT: return ARMCC::GT;
2046 case ISD::SETGE: return ARMCC::GE;
2047 case ISD::SETLT: return ARMCC::LT;
2048 case ISD::SETLE: return ARMCC::LE;
2049 case ISD::SETUGT: return ARMCC::HI;
2050 case ISD::SETUGE: return ARMCC::HS;
2051 case ISD::SETULT: return ARMCC::LO;
2052 case ISD::SETULE: return ARMCC::LS;
2053 }
2054}
2055
2056/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2058 ARMCC::CondCodes &CondCode2) {
2059 CondCode2 = ARMCC::AL;
2060 switch (CC) {
2061 default: llvm_unreachable("Unknown FP condition!");
2062 case ISD::SETEQ:
2063 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2064 case ISD::SETGT:
2065 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2066 case ISD::SETGE:
2067 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2068 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2069 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2070 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2071 case ISD::SETO: CondCode = ARMCC::VC; break;
2072 case ISD::SETUO: CondCode = ARMCC::VS; break;
2073 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2074 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2075 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2076 case ISD::SETLT:
2077 case ISD::SETULT: CondCode = ARMCC::LT; break;
2078 case ISD::SETLE:
2079 case ISD::SETULE: CondCode = ARMCC::LE; break;
2080 case ISD::SETNE:
2081 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2082 }
2083}
2084
2085//===----------------------------------------------------------------------===//
2086// Calling Convention Implementation
2087//===----------------------------------------------------------------------===//
2088
2089/// getEffectiveCallingConv - Get the effective calling convention, taking into
2090/// account presence of floating point hardware and calling convention
2091/// limitations, such as support for variadic functions.
2093ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2094 bool isVarArg) const {
2095 switch (CC) {
2096 default:
2097 report_fatal_error("Unsupported calling convention");
2100 case CallingConv::GHC:
2102 return CC;
2108 case CallingConv::Swift:
2111 case CallingConv::C:
2112 case CallingConv::Tail:
2113 if (!Subtarget->isAAPCS_ABI())
2114 return CallingConv::ARM_APCS;
2115 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2116 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2117 !isVarArg)
2119 else
2121 case CallingConv::Fast:
2123 if (!Subtarget->isAAPCS_ABI()) {
2124 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2125 return CallingConv::Fast;
2126 return CallingConv::ARM_APCS;
2127 } else if (Subtarget->hasVFP2Base() &&
2128 !Subtarget->isThumb1Only() && !isVarArg)
2130 else
2132 }
2133}
2134
2136 bool isVarArg) const {
2137 return CCAssignFnForNode(CC, false, isVarArg);
2138}
2139
2141 bool isVarArg) const {
2142 return CCAssignFnForNode(CC, true, isVarArg);
2143}
2144
2145/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2146/// CallingConvention.
2147CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2148 bool Return,
2149 bool isVarArg) const {
2150 switch (getEffectiveCallingConv(CC, isVarArg)) {
2151 default:
2152 report_fatal_error("Unsupported calling convention");
2154 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2158 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2159 case CallingConv::Fast:
2160 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2161 case CallingConv::GHC:
2162 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2166 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2168 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2169 }
2170}
2171
2172SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2173 MVT LocVT, MVT ValVT, SDValue Val) const {
2174 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2175 Val);
2176 if (Subtarget->hasFullFP16()) {
2177 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2178 } else {
2179 Val = DAG.getNode(ISD::TRUNCATE, dl,
2180 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2181 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2182 }
2183 return Val;
2184}
2185
2186SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2187 MVT LocVT, MVT ValVT,
2188 SDValue Val) const {
2189 if (Subtarget->hasFullFP16()) {
2190 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2191 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2192 } else {
2193 Val = DAG.getNode(ISD::BITCAST, dl,
2194 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2195 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2196 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2197 }
2198 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2199}
2200
2201/// LowerCallResult - Lower the result values of a call into the
2202/// appropriate copies out of appropriate physical registers.
2203SDValue ARMTargetLowering::LowerCallResult(
2204 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2205 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2206 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2207 SDValue ThisVal, bool isCmseNSCall) const {
2208 // Assign locations to each value returned by this call.
2210 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2211 *DAG.getContext());
2212 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2213
2214 // Copy all of the result registers out of their specified physreg.
2215 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2216 CCValAssign VA = RVLocs[i];
2217
2218 // Pass 'this' value directly from the argument to return value, to avoid
2219 // reg unit interference
2220 if (i == 0 && isThisReturn) {
2221 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2222 "unexpected return calling convention register assignment");
2223 InVals.push_back(ThisVal);
2224 continue;
2225 }
2226
2227 SDValue Val;
2228 if (VA.needsCustom() &&
2229 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2230 // Handle f64 or half of a v2f64.
2231 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2232 InGlue);
2233 Chain = Lo.getValue(1);
2234 InGlue = Lo.getValue(2);
2235 VA = RVLocs[++i]; // skip ahead to next loc
2236 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2237 InGlue);
2238 Chain = Hi.getValue(1);
2239 InGlue = Hi.getValue(2);
2240 if (!Subtarget->isLittle())
2241 std::swap (Lo, Hi);
2242 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2243
2244 if (VA.getLocVT() == MVT::v2f64) {
2245 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2246 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2247 DAG.getConstant(0, dl, MVT::i32));
2248
2249 VA = RVLocs[++i]; // skip ahead to next loc
2250 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2251 Chain = Lo.getValue(1);
2252 InGlue = Lo.getValue(2);
2253 VA = RVLocs[++i]; // skip ahead to next loc
2254 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2255 Chain = Hi.getValue(1);
2256 InGlue = Hi.getValue(2);
2257 if (!Subtarget->isLittle())
2258 std::swap (Lo, Hi);
2259 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2260 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2261 DAG.getConstant(1, dl, MVT::i32));
2262 }
2263 } else {
2264 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2265 InGlue);
2266 Chain = Val.getValue(1);
2267 InGlue = Val.getValue(2);
2268 }
2269
2270 switch (VA.getLocInfo()) {
2271 default: llvm_unreachable("Unknown loc info!");
2272 case CCValAssign::Full: break;
2273 case CCValAssign::BCvt:
2274 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2275 break;
2276 }
2277
2278 // f16 arguments have their size extended to 4 bytes and passed as if they
2279 // had been copied to the LSBs of a 32-bit register.
2280 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2281 if (VA.needsCustom() &&
2282 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2283 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2284
2285 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2286 // is less than 32 bits must be sign- or zero-extended after the call for
2287 // security reasons. Although the ABI mandates an extension done by the
2288 // callee, the latter cannot be trusted to follow the rules of the ABI.
2289 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2290 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2291 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2292 Val = handleCMSEValue(Val, Arg, DAG, dl);
2293
2294 InVals.push_back(Val);
2295 }
2296
2297 return Chain;
2298}
2299
2300std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2301 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2302 bool IsTailCall, int SPDiff) const {
2303 SDValue DstAddr;
2304 MachinePointerInfo DstInfo;
2305 int32_t Offset = VA.getLocMemOffset();
2307
2308 if (IsTailCall) {
2309 Offset += SPDiff;
2310 auto PtrVT = getPointerTy(DAG.getDataLayout());
2311 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2312 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2313 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2314 DstInfo =
2316 } else {
2317 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2318 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2319 StackPtr, PtrOff);
2320 DstInfo =
2322 }
2323
2324 return std::make_pair(DstAddr, DstInfo);
2325}
2326
2327void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2328 SDValue Chain, SDValue &Arg,
2329 RegsToPassVector &RegsToPass,
2330 CCValAssign &VA, CCValAssign &NextVA,
2331 SDValue &StackPtr,
2332 SmallVectorImpl<SDValue> &MemOpChains,
2333 bool IsTailCall,
2334 int SPDiff) const {
2335 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2336 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2337 unsigned id = Subtarget->isLittle() ? 0 : 1;
2338 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2339
2340 if (NextVA.isRegLoc())
2341 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2342 else {
2343 assert(NextVA.isMemLoc());
2344 if (!StackPtr.getNode())
2345 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2347
2348 SDValue DstAddr;
2349 MachinePointerInfo DstInfo;
2350 std::tie(DstAddr, DstInfo) =
2351 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2352 MemOpChains.push_back(
2353 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2354 }
2355}
2356
2357static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2358 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2360}
2361
2362/// LowerCall - Lowering a call into a callseq_start <-
2363/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2364/// nodes.
2365SDValue
2366ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2367 SmallVectorImpl<SDValue> &InVals) const {
2368 SelectionDAG &DAG = CLI.DAG;
2369 SDLoc &dl = CLI.DL;
2371 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2373 SDValue Chain = CLI.Chain;
2374 SDValue Callee = CLI.Callee;
2375 bool &isTailCall = CLI.IsTailCall;
2376 CallingConv::ID CallConv = CLI.CallConv;
2377 bool doesNotRet = CLI.DoesNotReturn;
2378 bool isVarArg = CLI.IsVarArg;
2379
2383 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2384 bool isThisReturn = false;
2385 bool isCmseNSCall = false;
2386 bool isSibCall = false;
2387 bool PreferIndirect = false;
2388 bool GuardWithBTI = false;
2389
2390 // Analyze operands of the call, assigning locations to each operand.
2392 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2393 *DAG.getContext());
2394 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2395
2396 // Lower 'returns_twice' calls to a pseudo-instruction.
2397 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2398 !Subtarget->noBTIAtReturnTwice())
2399 GuardWithBTI = AFI->branchTargetEnforcement();
2400
2401 // Determine whether this is a non-secure function call.
2402 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2403 isCmseNSCall = true;
2404
2405 // Disable tail calls if they're not supported.
2406 if (!Subtarget->supportsTailCall())
2407 isTailCall = false;
2408
2409 // For both the non-secure calls and the returns from a CMSE entry function,
2410 // the function needs to do some extra work afte r the call, or before the
2411 // return, respectively, thus it cannot end with atail call
2412 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2413 isTailCall = false;
2414
2415 if (isa<GlobalAddressSDNode>(Callee)) {
2416 // If we're optimizing for minimum size and the function is called three or
2417 // more times in this block, we can improve codesize by calling indirectly
2418 // as BLXr has a 16-bit encoding.
2419 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2420 if (CLI.CB) {
2421 auto *BB = CLI.CB->getParent();
2422 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2423 count_if(GV->users(), [&BB](const User *U) {
2424 return isa<Instruction>(U) &&
2425 cast<Instruction>(U)->getParent() == BB;
2426 }) > 2;
2427 }
2428 }
2429 if (isTailCall) {
2430 // Check if it's really possible to do a tail call.
2431 isTailCall =
2432 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2433
2434 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2435 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2436 isSibCall = true;
2437
2438 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2439 // detected sibcalls.
2440 if (isTailCall)
2441 ++NumTailCalls;
2442 }
2443
2444 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2445 report_fatal_error("failed to perform tail call elimination on a call "
2446 "site marked musttail");
2447
2448 // Get a count of how many bytes are to be pushed on the stack.
2449 unsigned NumBytes = CCInfo.getStackSize();
2450
2451 // SPDiff is the byte offset of the call's argument area from the callee's.
2452 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2453 // by this amount for a tail call. In a sibling call it must be 0 because the
2454 // caller will deallocate the entire stack and the callee still expects its
2455 // arguments to begin at SP+0. Completely unused for non-tail calls.
2456 int SPDiff = 0;
2457
2458 if (isTailCall && !isSibCall) {
2459 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2460 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2461
2462 // Since callee will pop argument stack as a tail call, we must keep the
2463 // popped size 16-byte aligned.
2464 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2465 NumBytes = alignTo(NumBytes, StackAlign);
2466
2467 // SPDiff will be negative if this tail call requires more space than we
2468 // would automatically have in our incoming argument space. Positive if we
2469 // can actually shrink the stack.
2470 SPDiff = NumReusableBytes - NumBytes;
2471
2472 // If this call requires more stack than we have available from
2473 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2474 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2475 AFI->setArgRegsSaveSize(-SPDiff);
2476 }
2477
2478 if (isSibCall) {
2479 // For sibling tail calls, memory operands are available in our caller's stack.
2480 NumBytes = 0;
2481 } else {
2482 // Adjust the stack pointer for the new arguments...
2483 // These operations are automatically eliminated by the prolog/epilog pass
2484 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2485 }
2486
2488 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2489
2490 RegsToPassVector RegsToPass;
2491 SmallVector<SDValue, 8> MemOpChains;
2492
2493 // During a tail call, stores to the argument area must happen after all of
2494 // the function's incoming arguments have been loaded because they may alias.
2495 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2496 // there's no point in doing so repeatedly so this tracks whether that's
2497 // happened yet.
2498 bool AfterFormalArgLoads = false;
2499
2500 // Walk the register/memloc assignments, inserting copies/loads. In the case
2501 // of tail call optimization, arguments are handled later.
2502 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2503 i != e;
2504 ++i, ++realArgIdx) {
2505 CCValAssign &VA = ArgLocs[i];
2506 SDValue Arg = OutVals[realArgIdx];
2507 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2508 bool isByVal = Flags.isByVal();
2509
2510 // Promote the value if needed.
2511 switch (VA.getLocInfo()) {
2512 default: llvm_unreachable("Unknown loc info!");
2513 case CCValAssign::Full: break;
2514 case CCValAssign::SExt:
2515 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2516 break;
2517 case CCValAssign::ZExt:
2518 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2519 break;
2520 case CCValAssign::AExt:
2521 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2522 break;
2523 case CCValAssign::BCvt:
2524 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2525 break;
2526 }
2527
2528 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2529 Chain = DAG.getStackArgumentTokenFactor(Chain);
2530 AfterFormalArgLoads = true;
2531 }
2532
2533 // f16 arguments have their size extended to 4 bytes and passed as if they
2534 // had been copied to the LSBs of a 32-bit register.
2535 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2536 if (VA.needsCustom() &&
2537 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2538 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2539 } else {
2540 // f16 arguments could have been extended prior to argument lowering.
2541 // Mask them arguments if this is a CMSE nonsecure call.
2542 auto ArgVT = Outs[realArgIdx].ArgVT;
2543 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2544 auto LocBits = VA.getLocVT().getSizeInBits();
2545 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2546 SDValue Mask =
2547 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2548 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2549 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2550 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2551 }
2552 }
2553
2554 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2555 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2556 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2557 DAG.getConstant(0, dl, MVT::i32));
2558 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2559 DAG.getConstant(1, dl, MVT::i32));
2560
2561 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2562 StackPtr, MemOpChains, isTailCall, SPDiff);
2563
2564 VA = ArgLocs[++i]; // skip ahead to next loc
2565 if (VA.isRegLoc()) {
2566 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2567 StackPtr, MemOpChains, isTailCall, SPDiff);
2568 } else {
2569 assert(VA.isMemLoc());
2570 SDValue DstAddr;
2571 MachinePointerInfo DstInfo;
2572 std::tie(DstAddr, DstInfo) =
2573 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2574 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2575 }
2576 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2577 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2578 StackPtr, MemOpChains, isTailCall, SPDiff);
2579 } else if (VA.isRegLoc()) {
2580 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2581 Outs[0].VT == MVT::i32) {
2582 assert(VA.getLocVT() == MVT::i32 &&
2583 "unexpected calling convention register assignment");
2584 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2585 "unexpected use of 'returned'");
2586 isThisReturn = true;
2587 }
2588 const TargetOptions &Options = DAG.getTarget().Options;
2589 if (Options.EmitCallSiteInfo)
2590 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2591 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2592 } else if (isByVal) {
2593 assert(VA.isMemLoc());
2594 unsigned offset = 0;
2595
2596 // True if this byval aggregate will be split between registers
2597 // and memory.
2598 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2599 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2600
2601 if (CurByValIdx < ByValArgsCount) {
2602
2603 unsigned RegBegin, RegEnd;
2604 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2605
2606 EVT PtrVT =
2608 unsigned int i, j;
2609 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2610 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2611 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2612 SDValue Load =
2613 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2614 DAG.InferPtrAlign(AddArg));
2615 MemOpChains.push_back(Load.getValue(1));
2616 RegsToPass.push_back(std::make_pair(j, Load));
2617 }
2618
2619 // If parameter size outsides register area, "offset" value
2620 // helps us to calculate stack slot for remained part properly.
2621 offset = RegEnd - RegBegin;
2622
2623 CCInfo.nextInRegsParam();
2624 }
2625
2626 if (Flags.getByValSize() > 4*offset) {
2627 auto PtrVT = getPointerTy(DAG.getDataLayout());
2628 SDValue Dst;
2629 MachinePointerInfo DstInfo;
2630 std::tie(Dst, DstInfo) =
2631 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2632 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2633 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2634 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2635 MVT::i32);
2636 SDValue AlignNode =
2637 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2638
2639 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2640 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2641 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2642 Ops));
2643 }
2644 } else {
2645 assert(VA.isMemLoc());
2646 SDValue DstAddr;
2647 MachinePointerInfo DstInfo;
2648 std::tie(DstAddr, DstInfo) =
2649 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2650
2651 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2652 MemOpChains.push_back(Store);
2653 }
2654 }
2655
2656 if (!MemOpChains.empty())
2657 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2658
2659 // Build a sequence of copy-to-reg nodes chained together with token chain
2660 // and flag operands which copy the outgoing args into the appropriate regs.
2661 SDValue InGlue;
2662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2663 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2664 RegsToPass[i].second, InGlue);
2665 InGlue = Chain.getValue(1);
2666 }
2667
2668 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2669 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2670 // node so that legalize doesn't hack it.
2671 bool isDirect = false;
2672
2674 const GlobalValue *GVal = nullptr;
2675 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2676 GVal = G->getGlobal();
2677 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2678
2679 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2680 bool isLocalARMFunc = false;
2681 auto PtrVt = getPointerTy(DAG.getDataLayout());
2682
2683 if (Subtarget->genLongCalls()) {
2684 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2685 "long-calls codegen is not position independent!");
2686 // Handle a global address or an external symbol. If it's not one of
2687 // those, the target's already in a register, so we don't need to do
2688 // anything extra.
2689 if (isa<GlobalAddressSDNode>(Callee)) {
2690 if (Subtarget->genExecuteOnly()) {
2691 if (Subtarget->useMovt())
2692 ++NumMovwMovt;
2693 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2694 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2695 } else {
2696 // Create a constant pool entry for the callee address
2697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2699 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2700
2701 // Get the address of the callee into a register
2702 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2703 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2704 Callee = DAG.getLoad(
2705 PtrVt, dl, DAG.getEntryNode(), Addr,
2707 }
2708 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2709 const char *Sym = S->getSymbol();
2710
2711 if (Subtarget->genExecuteOnly()) {
2712 if (Subtarget->useMovt())
2713 ++NumMovwMovt;
2714 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2715 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2716 } else {
2717 // Create a constant pool entry for the callee address
2718 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2720 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2721
2722 // Get the address of the callee into a register
2723 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2724 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2725 Callee = DAG.getLoad(
2726 PtrVt, dl, DAG.getEntryNode(), Addr,
2728 }
2729 }
2730 } else if (isa<GlobalAddressSDNode>(Callee)) {
2731 if (!PreferIndirect) {
2732 isDirect = true;
2733 bool isDef = GVal->isStrongDefinitionForLinker();
2734
2735 // ARM call to a local ARM function is predicable.
2736 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2737 // tBX takes a register source operand.
2738 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2739 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2740 Callee = DAG.getNode(
2741 ARMISD::WrapperPIC, dl, PtrVt,
2742 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2743 Callee = DAG.getLoad(
2744 PtrVt, dl, DAG.getEntryNode(), Callee,
2748 } else if (Subtarget->isTargetCOFF()) {
2749 assert(Subtarget->isTargetWindows() &&
2750 "Windows is the only supported COFF target");
2751 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2752 if (GVal->hasDLLImportStorageClass())
2753 TargetFlags = ARMII::MO_DLLIMPORT;
2754 else if (!TM.shouldAssumeDSOLocal(GVal))
2755 TargetFlags = ARMII::MO_COFFSTUB;
2756 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2757 TargetFlags);
2758 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2759 Callee =
2760 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2761 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2763 } else {
2764 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2765 }
2766 }
2767 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2768 isDirect = true;
2769 // tBX takes a register source operand.
2770 const char *Sym = S->getSymbol();
2771 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2772 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2775 ARMPCLabelIndex, 4);
2776 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2777 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2778 Callee = DAG.getLoad(
2779 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2781 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2782 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2783 } else {
2784 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2785 }
2786 }
2787
2788 if (isCmseNSCall) {
2789 assert(!isARMFunc && !isDirect &&
2790 "Cannot handle call to ARM function or direct call");
2791 if (NumBytes > 0) {
2793 "call to non-secure function would "
2794 "require passing arguments on stack",
2795 dl.getDebugLoc());
2796 DAG.getContext()->diagnose(Diag);
2797 }
2798 if (isStructRet) {
2801 "call to non-secure function would return value through pointer",
2802 dl.getDebugLoc());
2803 DAG.getContext()->diagnose(Diag);
2804 }
2805 }
2806
2807 // FIXME: handle tail calls differently.
2808 unsigned CallOpc;
2809 if (Subtarget->isThumb()) {
2810 if (GuardWithBTI)
2811 CallOpc = ARMISD::t2CALL_BTI;
2812 else if (isCmseNSCall)
2813 CallOpc = ARMISD::tSECALL;
2814 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2815 CallOpc = ARMISD::CALL_NOLINK;
2816 else
2817 CallOpc = ARMISD::CALL;
2818 } else {
2819 if (!isDirect && !Subtarget->hasV5TOps())
2820 CallOpc = ARMISD::CALL_NOLINK;
2821 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2822 // Emit regular call when code size is the priority
2823 !Subtarget->hasMinSize())
2824 // "mov lr, pc; b _foo" to avoid confusing the RSP
2825 CallOpc = ARMISD::CALL_NOLINK;
2826 else
2827 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2828 }
2829
2830 // We don't usually want to end the call-sequence here because we would tidy
2831 // the frame up *after* the call, however in the ABI-changing tail-call case
2832 // we've carefully laid out the parameters so that when sp is reset they'll be
2833 // in the correct location.
2834 if (isTailCall && !isSibCall) {
2835 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2836 InGlue = Chain.getValue(1);
2837 }
2838
2839 std::vector<SDValue> Ops;
2840 Ops.push_back(Chain);
2841 Ops.push_back(Callee);
2842
2843 if (isTailCall) {
2844 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2845 }
2846
2847 // Add argument registers to the end of the list so that they are known live
2848 // into the call.
2849 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2850 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2851 RegsToPass[i].second.getValueType()));
2852
2853 // Add a register mask operand representing the call-preserved registers.
2854 const uint32_t *Mask;
2855 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2856 if (isThisReturn) {
2857 // For 'this' returns, use the R0-preserving mask if applicable
2858 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2859 if (!Mask) {
2860 // Set isThisReturn to false if the calling convention is not one that
2861 // allows 'returned' to be modeled in this way, so LowerCallResult does
2862 // not try to pass 'this' straight through
2863 isThisReturn = false;
2864 Mask = ARI->getCallPreservedMask(MF, CallConv);
2865 }
2866 } else
2867 Mask = ARI->getCallPreservedMask(MF, CallConv);
2868
2869 assert(Mask && "Missing call preserved mask for calling convention");
2870 Ops.push_back(DAG.getRegisterMask(Mask));
2871
2872 if (InGlue.getNode())
2873 Ops.push_back(InGlue);
2874
2875 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2876 if (isTailCall) {
2878 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2879 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2880 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2881 return Ret;
2882 }
2883
2884 // Returns a chain and a flag for retval copy to use.
2885 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2886 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2887 InGlue = Chain.getValue(1);
2888 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2889
2890 // If we're guaranteeing tail-calls will be honoured, the callee must
2891 // pop its own argument stack on return. But this call is *not* a tail call so
2892 // we need to undo that after it returns to restore the status-quo.
2893 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2894 uint64_t CalleePopBytes =
2895 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2896
2897 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2898 if (!Ins.empty())
2899 InGlue = Chain.getValue(1);
2900
2901 // Handle result values, copying them out of physregs into vregs that we
2902 // return.
2903 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2904 InVals, isThisReturn,
2905 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
2906}
2907
2908/// HandleByVal - Every parameter *after* a byval parameter is passed
2909/// on the stack. Remember the next parameter register to allocate,
2910/// and then confiscate the rest of the parameter registers to insure
2911/// this.
2912void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2913 Align Alignment) const {
2914 // Byval (as with any stack) slots are always at least 4 byte aligned.
2915 Alignment = std::max(Alignment, Align(4));
2916
2917 unsigned Reg = State->AllocateReg(GPRArgRegs);
2918 if (!Reg)
2919 return;
2920
2921 unsigned AlignInRegs = Alignment.value() / 4;
2922 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2923 for (unsigned i = 0; i < Waste; ++i)
2924 Reg = State->AllocateReg(GPRArgRegs);
2925
2926 if (!Reg)
2927 return;
2928
2929 unsigned Excess = 4 * (ARM::R4 - Reg);
2930
2931 // Special case when NSAA != SP and parameter size greater than size of
2932 // all remained GPR regs. In that case we can't split parameter, we must
2933 // send it to stack. We also must set NCRN to R4, so waste all
2934 // remained registers.
2935 const unsigned NSAAOffset = State->getStackSize();
2936 if (NSAAOffset != 0 && Size > Excess) {
2937 while (State->AllocateReg(GPRArgRegs))
2938 ;
2939 return;
2940 }
2941
2942 // First register for byval parameter is the first register that wasn't
2943 // allocated before this method call, so it would be "reg".
2944 // If parameter is small enough to be saved in range [reg, r4), then
2945 // the end (first after last) register would be reg + param-size-in-regs,
2946 // else parameter would be splitted between registers and stack,
2947 // end register would be r4 in this case.
2948 unsigned ByValRegBegin = Reg;
2949 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2950 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2951 // Note, first register is allocated in the beginning of function already,
2952 // allocate remained amount of registers we need.
2953 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2954 State->AllocateReg(GPRArgRegs);
2955 // A byval parameter that is split between registers and memory needs its
2956 // size truncated here.
2957 // In the case where the entire structure fits in registers, we set the
2958 // size in memory to zero.
2959 Size = std::max<int>(Size - Excess, 0);
2960}
2961
2962/// MatchingStackOffset - Return true if the given stack call argument is
2963/// already available in the same position (relatively) of the caller's
2964/// incoming argument stack.
2965static
2968 const TargetInstrInfo *TII) {
2969 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2970 int FI = std::numeric_limits<int>::max();
2971 if (Arg.getOpcode() == ISD::CopyFromReg) {
2972 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2973 if (!VR.isVirtual())
2974 return false;
2975 MachineInstr *Def = MRI->getVRegDef(VR);
2976 if (!Def)
2977 return false;
2978 if (!Flags.isByVal()) {
2979 if (!TII->isLoadFromStackSlot(*Def, FI))
2980 return false;
2981 } else {
2982 return false;
2983 }
2984 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2985 if (Flags.isByVal())
2986 // ByVal argument is passed in as a pointer but it's now being
2987 // dereferenced. e.g.
2988 // define @foo(%struct.X* %A) {
2989 // tail call @bar(%struct.X* byval %A)
2990 // }
2991 return false;
2992 SDValue Ptr = Ld->getBasePtr();
2993 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2994 if (!FINode)
2995 return false;
2996 FI = FINode->getIndex();
2997 } else
2998 return false;
2999
3000 assert(FI != std::numeric_limits<int>::max());
3001 if (!MFI.isFixedObjectIndex(FI))
3002 return false;
3003 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3004}
3005
3006/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3007/// for tail call optimization. Targets which want to do tail call
3008/// optimization should implement this function. Note that this function also
3009/// processes musttail calls, so when this function returns false on a valid
3010/// musttail call, a fatal backend error occurs.
3011bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3013 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3014 CallingConv::ID CalleeCC = CLI.CallConv;
3015 SDValue Callee = CLI.Callee;
3016 bool isVarArg = CLI.IsVarArg;
3017 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3018 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3020 const SelectionDAG &DAG = CLI.DAG;
3022 const Function &CallerF = MF.getFunction();
3023 CallingConv::ID CallerCC = CallerF.getCallingConv();
3024
3025 assert(Subtarget->supportsTailCall());
3026
3027 // Indirect tail-calls require a register to hold the target address. That
3028 // register must be:
3029 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
3030 // * Not callee-saved, so must be one of r0-r3 or r12.
3031 // * Not used to hold an argument to the tail-called function, which might be
3032 // in r0-r3.
3033 // * Not used to hold the return address authentication code, which is in r12
3034 // if enabled.
3035 // Sometimes, no register matches all of these conditions, so we can't do a
3036 // tail-call.
3037 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3038 SmallSet<MCPhysReg, 5> AddressRegisters;
3039 for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3040 AddressRegisters.insert(R);
3041 if (!(Subtarget->isThumb1Only() ||
3043 AddressRegisters.insert(ARM::R12);
3044 for (const CCValAssign &AL : ArgLocs)
3045 if (AL.isRegLoc())
3046 AddressRegisters.erase(AL.getLocReg());
3047 if (AddressRegisters.empty())
3048 return false;
3049 }
3050
3051 // Look for obvious safe cases to perform tail call optimization that do not
3052 // require ABI changes. This is what gcc calls sibcall.
3053
3054 // Exception-handling functions need a special set of instructions to indicate
3055 // a return to the hardware. Tail-calling another function would probably
3056 // break this.
3057 if (CallerF.hasFnAttribute("interrupt"))
3058 return false;
3059
3060 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3061 return CalleeCC == CallerCC;
3062
3063 // Also avoid sibcall optimization if either caller or callee uses struct
3064 // return semantics.
3065 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3066 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3067 if (isCalleeStructRet || isCallerStructRet)
3068 return false;
3069
3070 // Externally-defined functions with weak linkage should not be
3071 // tail-called on ARM when the OS does not support dynamic
3072 // pre-emption of symbols, as the AAELF spec requires normal calls
3073 // to undefined weak functions to be replaced with a NOP or jump to the
3074 // next instruction. The behaviour of branch instructions in this
3075 // situation (as used for tail calls) is implementation-defined, so we
3076 // cannot rely on the linker replacing the tail call with a return.
3077 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3078 const GlobalValue *GV = G->getGlobal();
3080 if (GV->hasExternalWeakLinkage() &&
3081 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3082 return false;
3083 }
3084
3085 // Check that the call results are passed in the same way.
3086 LLVMContext &C = *DAG.getContext();
3088 getEffectiveCallingConv(CalleeCC, isVarArg),
3089 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3090 CCAssignFnForReturn(CalleeCC, isVarArg),
3091 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3092 return false;
3093 // The callee has to preserve all registers the caller needs to preserve.
3094 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3095 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3096 if (CalleeCC != CallerCC) {
3097 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3098 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3099 return false;
3100 }
3101
3102 // If Caller's vararg or byval argument has been split between registers and
3103 // stack, do not perform tail call, since part of the argument is in caller's
3104 // local frame.
3105 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3106 if (AFI_Caller->getArgRegsSaveSize())
3107 return false;
3108
3109 // If the callee takes no arguments then go on to check the results of the
3110 // call.
3111 if (!Outs.empty()) {
3112 if (CCInfo.getStackSize()) {
3113 // Check if the arguments are already laid out in the right way as
3114 // the caller's fixed stack objects.
3115 MachineFrameInfo &MFI = MF.getFrameInfo();
3116 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3117 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3118 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3119 i != e;
3120 ++i, ++realArgIdx) {
3121 CCValAssign &VA = ArgLocs[i];
3122 EVT RegVT = VA.getLocVT();
3123 SDValue Arg = OutVals[realArgIdx];
3124 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3126 return false;
3127 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3128 // f64 and vector types are split into multiple registers or
3129 // register/stack-slot combinations. The types will not match
3130 // the registers; give up on memory f64 refs until we figure
3131 // out what to do about this.
3132 if (!VA.isRegLoc())
3133 return false;
3134 if (!ArgLocs[++i].isRegLoc())
3135 return false;
3136 if (RegVT == MVT::v2f64) {
3137 if (!ArgLocs[++i].isRegLoc())
3138 return false;
3139 if (!ArgLocs[++i].isRegLoc())
3140 return false;
3141 }
3142 } else if (!VA.isRegLoc()) {
3144 MFI, MRI, TII))
3145 return false;
3146 }
3147 }
3148 }
3149
3150 const MachineRegisterInfo &MRI = MF.getRegInfo();
3151 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3152 return false;
3153 }
3154
3155 return true;
3156}
3157
3158bool
3159ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3160 MachineFunction &MF, bool isVarArg,
3162 LLVMContext &Context) const {
3164 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3165 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3166}
3167
3169 const SDLoc &DL, SelectionDAG &DAG) {
3170 const MachineFunction &MF = DAG.getMachineFunction();
3171 const Function &F = MF.getFunction();
3172
3173 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3174
3175 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3176 // version of the "preferred return address". These offsets affect the return
3177 // instruction if this is a return from PL1 without hypervisor extensions.
3178 // IRQ/FIQ: +4 "subs pc, lr, #4"
3179 // SWI: 0 "subs pc, lr, #0"
3180 // ABORT: +4 "subs pc, lr, #4"
3181 // UNDEF: +4/+2 "subs pc, lr, #0"
3182 // UNDEF varies depending on where the exception came from ARM or Thumb
3183 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3184
3185 int64_t LROffset;
3186 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3187 IntKind == "ABORT")
3188 LROffset = 4;
3189 else if (IntKind == "SWI" || IntKind == "UNDEF")
3190 LROffset = 0;
3191 else
3192 report_fatal_error("Unsupported interrupt attribute. If present, value "
3193 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3194
3195 RetOps.insert(RetOps.begin() + 1,
3196 DAG.getConstant(LROffset, DL, MVT::i32, false));
3197
3198 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3199}
3200
3201SDValue
3202ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3203 bool isVarArg,
3205 const SmallVectorImpl<SDValue> &OutVals,
3206 const SDLoc &dl, SelectionDAG &DAG) const {
3207 // CCValAssign - represent the assignment of the return value to a location.
3209
3210 // CCState - Info about the registers and stack slots.
3211 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3212 *DAG.getContext());
3213
3214 // Analyze outgoing return values.
3215 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3216
3217 SDValue Glue;
3219 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3220 bool isLittleEndian = Subtarget->isLittle();
3221
3224 AFI->setReturnRegsCount(RVLocs.size());
3225
3226 // Report error if cmse entry function returns structure through first ptr arg.
3227 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3228 // Note: using an empty SDLoc(), as the first line of the function is a
3229 // better place to report than the last line.
3232 "secure entry function would return value through pointer",
3233 SDLoc().getDebugLoc());
3234 DAG.getContext()->diagnose(Diag);
3235 }
3236
3237 // Copy the result values into the output registers.
3238 for (unsigned i = 0, realRVLocIdx = 0;
3239 i != RVLocs.size();
3240 ++i, ++realRVLocIdx) {
3241 CCValAssign &VA = RVLocs[i];
3242 assert(VA.isRegLoc() && "Can only return in registers!");
3243
3244 SDValue Arg = OutVals[realRVLocIdx];
3245 bool ReturnF16 = false;
3246
3247 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3248 // Half-precision return values can be returned like this:
3249 //
3250 // t11 f16 = fadd ...
3251 // t12: i16 = bitcast t11
3252 // t13: i32 = zero_extend t12
3253 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3254 //
3255 // to avoid code generation for bitcasts, we simply set Arg to the node
3256 // that produces the f16 value, t11 in this case.
3257 //
3258 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3259 SDValue ZE = Arg.getOperand(0);
3260 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3261 SDValue BC = ZE.getOperand(0);
3262 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3263 Arg = BC.getOperand(0);
3264 ReturnF16 = true;
3265 }
3266 }
3267 }
3268 }
3269
3270 switch (VA.getLocInfo()) {
3271 default: llvm_unreachable("Unknown loc info!");
3272 case CCValAssign::Full: break;
3273 case CCValAssign::BCvt:
3274 if (!ReturnF16)
3275 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3276 break;
3277 }
3278
3279 // Mask f16 arguments if this is a CMSE nonsecure entry.
3280 auto RetVT = Outs[realRVLocIdx].ArgVT;
3281 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3282 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3283 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3284 } else {
3285 auto LocBits = VA.getLocVT().getSizeInBits();
3286 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3287 SDValue Mask =
3288 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3289 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3290 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3291 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3292 }
3293 }
3294
3295 if (VA.needsCustom() &&
3296 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3297 if (VA.getLocVT() == MVT::v2f64) {
3298 // Extract the first half and return it in two registers.
3299 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3300 DAG.getConstant(0, dl, MVT::i32));
3301 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3302 DAG.getVTList(MVT::i32, MVT::i32), Half);
3303
3304 Chain =
3305 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3306 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3307 Glue = Chain.getValue(1);
3308 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3309 VA = RVLocs[++i]; // skip ahead to next loc
3310 Chain =
3311 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3312 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3313 Glue = Chain.getValue(1);
3314 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3315 VA = RVLocs[++i]; // skip ahead to next loc
3316
3317 // Extract the 2nd half and fall through to handle it as an f64 value.
3318 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3319 DAG.getConstant(1, dl, MVT::i32));
3320 }
3321 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3322 // available.
3323 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3324 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3325 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3326 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3327 Glue = Chain.getValue(1);
3328 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3329 VA = RVLocs[++i]; // skip ahead to next loc
3330 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3331 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3332 } else
3333 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3334
3335 // Guarantee that all emitted copies are
3336 // stuck together, avoiding something bad.
3337 Glue = Chain.getValue(1);
3338 RetOps.push_back(DAG.getRegister(
3339 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3340 }
3341 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3342 const MCPhysReg *I =
3343 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3344 if (I) {
3345 for (; *I; ++I) {
3346 if (ARM::GPRRegClass.contains(*I))
3347 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3348 else if (ARM::DPRRegClass.contains(*I))
3350 else
3351 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3352 }
3353 }
3354
3355 // Update chain and glue.
3356 RetOps[0] = Chain;
3357 if (Glue.getNode())
3358 RetOps.push_back(Glue);
3359
3360 // CPUs which aren't M-class use a special sequence to return from
3361 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3362 // though we use "subs pc, lr, #N").
3363 //
3364 // M-class CPUs actually use a normal return sequence with a special
3365 // (hardware-provided) value in LR, so the normal code path works.
3366 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3367 !Subtarget->isMClass()) {
3368 if (Subtarget->isThumb1Only())
3369 report_fatal_error("interrupt attribute is not supported in Thumb1");
3370 return LowerInterruptReturn(RetOps, dl, DAG);
3371 }
3372
3375 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3376}
3377
3378bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3379 if (N->getNumValues() != 1)
3380 return false;
3381 if (!N->hasNUsesOfValue(1, 0))
3382 return false;
3383
3384 SDValue TCChain = Chain;
3385 SDNode *Copy = *N->use_begin();
3386 if (Copy->getOpcode() == ISD::CopyToReg) {
3387 // If the copy has a glue operand, we conservatively assume it isn't safe to
3388 // perform a tail call.
3389 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3390 return false;
3391 TCChain = Copy->getOperand(0);
3392 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3393 SDNode *VMov = Copy;
3394 // f64 returned in a pair of GPRs.
3396 for (SDNode *U : VMov->uses()) {
3397 if (U->getOpcode() != ISD::CopyToReg)
3398 return false;
3399 Copies.insert(U);
3400 }
3401 if (Copies.size() > 2)
3402 return false;
3403
3404 for (SDNode *U : VMov->uses()) {
3405 SDValue UseChain = U->getOperand(0);
3406 if (Copies.count(UseChain.getNode()))
3407 // Second CopyToReg
3408 Copy = U;
3409 else {
3410 // We are at the top of this chain.
3411 // If the copy has a glue operand, we conservatively assume it
3412 // isn't safe to perform a tail call.
3413 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3414 return false;
3415 // First CopyToReg
3416 TCChain = UseChain;
3417 }
3418 }
3419 } else if (Copy->getOpcode() == ISD::BITCAST) {
3420 // f32 returned in a single GPR.
3421 if (!Copy->hasOneUse())
3422 return false;
3423 Copy = *Copy->use_begin();
3424 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3425 return false;
3426 // If the copy has a glue operand, we conservatively assume it isn't safe to
3427 // perform a tail call.
3428 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3429 return false;
3430 TCChain = Copy->getOperand(0);
3431 } else {
3432 return false;
3433 }
3434
3435 bool HasRet = false;
3436 for (const SDNode *U : Copy->uses()) {
3437 if (U->getOpcode() != ARMISD::RET_GLUE &&
3438 U->getOpcode() != ARMISD::INTRET_GLUE)
3439 return false;
3440 HasRet = true;
3441 }
3442
3443 if (!HasRet)
3444 return false;
3445
3446 Chain = TCChain;
3447 return true;
3448}
3449
3450bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3451 if (!Subtarget->supportsTailCall())
3452 return false;
3453
3454 if (!CI->isTailCall())
3455 return false;
3456
3457 return true;
3458}
3459
3460// Trying to write a 64 bit value so need to split into two 32 bit values first,
3461// and pass the lower and high parts through.
3463 SDLoc DL(Op);
3464 SDValue WriteValue = Op->getOperand(2);
3465
3466 // This function is only supposed to be called for i64 type argument.
3467 assert(WriteValue.getValueType() == MVT::i64
3468 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3469
3470 SDValue Lo, Hi;
3471 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3472 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3473 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3474}
3475
3476// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3477// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3478// one of the above mentioned nodes. It has to be wrapped because otherwise
3479// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3480// be used to form addressing mode. These wrapped nodes will be selected
3481// into MOVi.
3482SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3483 SelectionDAG &DAG) const {
3484 EVT PtrVT = Op.getValueType();
3485 // FIXME there is no actual debug info here
3486 SDLoc dl(Op);
3487 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3488 SDValue Res;
3489
3490 // When generating execute-only code Constant Pools must be promoted to the
3491 // global data section. It's a bit ugly that we can't share them across basic
3492 // blocks, but this way we guarantee that execute-only behaves correct with
3493 // position-independent addressing modes.
3494 if (Subtarget->genExecuteOnly()) {
3495 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3496 auto T = const_cast<Type*>(CP->getType());
3497 auto C = const_cast<Constant*>(CP->getConstVal());
3498 auto M = const_cast<Module*>(DAG.getMachineFunction().
3500 auto GV = new GlobalVariable(
3501 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3504 Twine(AFI->createPICLabelUId())
3505 );
3506 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3507 dl, PtrVT);
3508 return LowerGlobalAddress(GA, DAG);
3509 }
3510
3511 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3512 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3513 Align CPAlign = CP->getAlign();
3514 if (Subtarget->isThumb1Only())
3515 CPAlign = std::max(CPAlign, Align(4));
3516 if (CP->isMachineConstantPoolEntry())
3517 Res =
3518 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3519 else
3520 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3521 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3522}
3523
3525 // If we don't have a 32-bit pc-relative branch instruction then the jump
3526 // table consists of block addresses. Usually this is inline, but for
3527 // execute-only it must be placed out-of-line.
3528 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3531}
3532
3533SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3534 SelectionDAG &DAG) const {
3537 unsigned ARMPCLabelIndex = 0;
3538 SDLoc DL(Op);
3539 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3540 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3541 SDValue CPAddr;
3542 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3543 if (!IsPositionIndependent) {
3544 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3545 } else {
3546 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3547 ARMPCLabelIndex = AFI->createPICLabelUId();
3549 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3550 ARMCP::CPBlockAddress, PCAdj);
3551 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3552 }
3553 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3554 SDValue Result = DAG.getLoad(
3555 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3557 if (!IsPositionIndependent)
3558 return Result;
3559 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3560 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3561}
3562
3563/// Convert a TLS address reference into the correct sequence of loads
3564/// and calls to compute the variable's address for Darwin, and return an
3565/// SDValue containing the final node.
3566
3567/// Darwin only has one TLS scheme which must be capable of dealing with the
3568/// fully general situation, in the worst case. This means:
3569/// + "extern __thread" declaration.
3570/// + Defined in a possibly unknown dynamic library.
3571///
3572/// The general system is that each __thread variable has a [3 x i32] descriptor
3573/// which contains information used by the runtime to calculate the address. The
3574/// only part of this the compiler needs to know about is the first word, which
3575/// contains a function pointer that must be called with the address of the
3576/// entire descriptor in "r0".
3577///
3578/// Since this descriptor may be in a different unit, in general access must
3579/// proceed along the usual ARM rules. A common sequence to produce is:
3580///
3581/// movw rT1, :lower16:_var$non_lazy_ptr
3582/// movt rT1, :upper16:_var$non_lazy_ptr
3583/// ldr r0, [rT1]
3584/// ldr rT2, [r0]
3585/// blx rT2
3586/// [...address now in r0...]
3587SDValue
3588ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3589 SelectionDAG &DAG) const {
3590 assert(Subtarget->isTargetDarwin() &&
3591 "This function expects a Darwin target");
3592 SDLoc DL(Op);
3593
3594 // First step is to get the address of the actua global symbol. This is where
3595 // the TLS descriptor lives.
3596 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3597
3598 // The first entry in the descriptor is a function pointer that we must call
3599 // to obtain the address of the variable.
3600 SDValue Chain = DAG.getEntryNode();
3601 SDValue FuncTLVGet = DAG.getLoad(
3602 MVT::i32, DL, Chain, DescAddr,
3606 Chain = FuncTLVGet.getValue(1);
3607
3609 MachineFrameInfo &MFI = F.getFrameInfo();
3610 MFI.setAdjustsStack(true);
3611
3612 // TLS calls preserve all registers except those that absolutely must be
3613 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3614 // silly).
3615 auto TRI =
3617 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3619
3620 // Finally, we can make the call. This is just a degenerate version of a
3621 // normal AArch64 call node: r0 takes the address of the descriptor, and
3622 // returns the address of the variable in this thread.
3623 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3624 Chain =
3625 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3626 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3627 DAG.getRegisterMask(Mask), Chain.getValue(1));
3628 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3629}
3630
3631SDValue
3632ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3633 SelectionDAG &DAG) const {
3634 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3635
3636 SDValue Chain = DAG.getEntryNode();
3637 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3638 SDLoc DL(Op);
3639
3640 // Load the current TEB (thread environment block)
3641 SDValue Ops[] = {Chain,
3642 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3643 DAG.getTargetConstant(15, DL, MVT::i32),
3644 DAG.getTargetConstant(0, DL, MVT::i32),
3645 DAG.getTargetConstant(13, DL, MVT::i32),
3646 DAG.getTargetConstant(0, DL, MVT::i32),
3647 DAG.getTargetConstant(2, DL, MVT::i32)};
3648 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3649 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3650
3651 SDValue TEB = CurrentTEB.getValue(0);
3652 Chain = CurrentTEB.getValue(1);
3653
3654 // Load the ThreadLocalStoragePointer from the TEB
3655 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3656 SDValue TLSArray =
3657 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3658 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3659
3660 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3661 // offset into the TLSArray.
3662
3663 // Load the TLS index from the C runtime
3664 SDValue TLSIndex =
3665 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3666 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3667 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3668
3669 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3670 DAG.getConstant(2, DL, MVT::i32));
3671 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3672 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3674
3675 // Get the offset of the start of the .tls section (section base)
3676 const auto *GA = cast<GlobalAddressSDNode>(Op);
3677 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3678 SDValue Offset = DAG.getLoad(
3679 PtrVT, DL, Chain,
3680 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3681 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3683
3684 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3685}
3686
3687// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3688SDValue
3689ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3690 SelectionDAG &DAG) const {
3691 SDLoc dl(GA);
3692 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3693 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3696 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3698 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3699 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3700 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3701 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3702 Argument = DAG.getLoad(
3703 PtrVT, dl, DAG.getEntryNode(), Argument,
3705 SDValue Chain = Argument.getValue(1);
3706
3707 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3708 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3709
3710 // call __tls_get_addr.
3712 ArgListEntry Entry;
3713 Entry.Node = Argument;
3714 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3715 Args.push_back(Entry);
3716
3717 // FIXME: is there useful debug info available here?
3719 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3721 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3722
3723 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3724 return CallResult.first;
3725}
3726
3727// Lower ISD::GlobalTLSAddress using the "initial exec" or
3728// "local exec" model.
3729SDValue
3730ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3731 SelectionDAG &DAG,
3732 TLSModel::Model model) const {
3733 const GlobalValue *GV = GA->getGlobal();
3734 SDLoc dl(GA);
3736 SDValue Chain = DAG.getEntryNode();
3737 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3738 // Get the Thread Pointer
3740
3741 if (model == TLSModel::InitialExec) {
3744 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3745 // Initial exec model.
3746 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3748 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3750 true);
3751 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3752 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3753 Offset = DAG.getLoad(
3754 PtrVT, dl, Chain, Offset,
3756 Chain = Offset.getValue(1);
3757
3758 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3759 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3760
3761 Offset = DAG.getLoad(
3762 PtrVT, dl, Chain, Offset,
3764 } else {
3765 // local exec model
3766 assert(model == TLSModel::LocalExec);
3769 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3770 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3771 Offset = DAG.getLoad(
3772 PtrVT, dl, Chain, Offset,
3774 }
3775
3776 // The address of the thread local variable is the add of the thread
3777 // pointer with the offset of the variable.
3778 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3779}
3780
3781SDValue
3782ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3783 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3784 if (DAG.getTarget().useEmulatedTLS())
3785 return LowerToTLSEmulatedModel(GA, DAG);
3786
3787 if (Subtarget->isTargetDarwin())
3788 return LowerGlobalTLSAddressDarwin(Op, DAG);
3789
3790 if (Subtarget->isTargetWindows())
3791 return LowerGlobalTLSAddressWindows(Op, DAG);
3792
3793 // TODO: implement the "local dynamic" model
3794 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3796
3797 switch (model) {
3800 return LowerToTLSGeneralDynamicModel(GA, DAG);
3803 return LowerToTLSExecModels(GA, DAG, model);
3804 }
3805 llvm_unreachable("bogus TLS model");
3806}
3807
3808/// Return true if all users of V are within function F, looking through
3809/// ConstantExprs.
3810static bool allUsersAreInFunction(const Value *V, const Function *F) {
3811 SmallVector<const User*,4> Worklist(V->users());
3812 while (!Worklist.empty()) {
3813 auto *U = Worklist.pop_back_val();
3814 if (isa<ConstantExpr>(U)) {
3815 append_range(Worklist, U->users());
3816 continue;
3817 }
3818
3819 auto *I = dyn_cast<Instruction>(U);
3820 if (!I || I->getParent()->getParent() != F)
3821 return false;
3822 }
3823 return true;
3824}
3825
3827 const GlobalValue *GV, SelectionDAG &DAG,
3828 EVT PtrVT, const SDLoc &dl) {
3829 // If we're creating a pool entry for a constant global with unnamed address,
3830 // and the global is small enough, we can emit it inline into the constant pool
3831 // to save ourselves an indirection.
3832 //
3833 // This is a win if the constant is only used in one function (so it doesn't
3834 // need to be duplicated) or duplicating the constant wouldn't increase code
3835 // size (implying the constant is no larger than 4 bytes).
3836 const Function &F = DAG.getMachineFunction().getFunction();
3837
3838 // We rely on this decision to inline being idemopotent and unrelated to the
3839 // use-site. We know that if we inline a variable at one use site, we'll
3840 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3841 // doesn't know about this optimization, so bail out if it's enabled else
3842 // we could decide to inline here (and thus never emit the GV) but require
3843 // the GV from fast-isel generated code.
3846 return SDValue();
3847
3848 auto *GVar = dyn_cast<GlobalVariable>(GV);
3849 if (!GVar || !GVar->hasInitializer() ||
3850 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3851 !GVar->hasLocalLinkage())
3852 return SDValue();
3853
3854 // If we inline a value that contains relocations, we move the relocations
3855 // from .data to .text. This is not allowed in position-independent code.
3856 auto *Init = GVar->getInitializer();
3857 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3858 Init->needsDynamicRelocation())
3859 return SDValue();
3860
3861 // The constant islands pass can only really deal with alignment requests
3862 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3863 // any type wanting greater alignment requirements than 4 bytes. We also
3864 // can only promote constants that are multiples of 4 bytes in size or
3865 // are paddable to a multiple of 4. Currently we only try and pad constants
3866 // that are strings for simplicity.
3867 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3868 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3869 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3870 unsigned RequiredPadding = 4 - (Size % 4);
3871 bool PaddingPossible =
3872 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3873 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3874 Size == 0)
3875 return SDValue();
3876
3877 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3880
3881 // We can't bloat the constant pool too much, else the ConstantIslands pass
3882 // may fail to converge. If we haven't promoted this global yet (it may have
3883 // multiple uses), and promoting it would increase the constant pool size (Sz
3884 // > 4), ensure we have space to do so up to MaxTotal.
3885 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3886 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3888 return SDValue();
3889
3890 // This is only valid if all users are in a single function; we can't clone
3891 // the constant in general. The LLVM IR unnamed_addr allows merging
3892 // constants, but not cloning them.
3893 //
3894 // We could potentially allow cloning if we could prove all uses of the
3895 // constant in the current function don't care about the address, like
3896 // printf format strings. But that isn't implemented for now.
3897 if (!allUsersAreInFunction(GVar, &F))
3898 return SDValue();
3899
3900 // We're going to inline this global. Pad it out if needed.
3901 if (RequiredPadding != 4) {
3902 StringRef S = CDAInit->getAsString();
3903
3905 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3906 while (RequiredPadding--)
3907 V.push_back(0);
3909 }
3910
3911 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3912 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3913 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3916 PaddedSize - 4);
3917 }
3918 ++NumConstpoolPromoted;
3919 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3920}
3921
3923 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3924 if (!(GV = GA->getAliaseeObject()))
3925 return false;
3926 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3927 return V->isConstant();
3928 return isa<Function>(GV);
3929}
3930
3931SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3932 SelectionDAG &DAG) const {
3933 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3934 default: llvm_unreachable("unknown object format");
3935 case Triple::COFF:
3936 return LowerGlobalAddressWindows(Op, DAG);
3937 case Triple::ELF:
3938 return LowerGlobalAddressELF(Op, DAG);
3939 case Triple::MachO:
3940 return LowerGlobalAddressDarwin(Op, DAG);
3941 }
3942}
3943
3944SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3945 SelectionDAG &DAG) const {
3946 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3947 SDLoc dl(Op);
3948 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3949 bool IsRO = isReadOnly(GV);
3950
3951 // promoteToConstantPool only if not generating XO text section
3952 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3953 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3954 return V;
3955
3956 if (isPositionIndependent()) {
3958 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3959 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3960 if (!GV->isDSOLocal())
3961 Result =
3962 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3964 return Result;
3965 } else if (Subtarget->isROPI() && IsRO) {
3966 // PC-relative.
3967 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3968 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3969 return Result;
3970 } else if (Subtarget->isRWPI() && !IsRO) {
3971 // SB-relative.
3972 SDValue RelAddr;
3973 if (Subtarget->useMovt()) {
3974 ++NumMovwMovt;
3975 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3976 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3977 } else { // use literal pool for address constant
3980 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3981 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3982 RelAddr = DAG.getLoad(
3983 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3985 }
3986 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3987 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3988 return Result;
3989 }
3990
3991 // If we have T2 ops, we can materialize the address directly via movt/movw
3992 // pair. This is always cheaper. If need to generate Execute Only code, and we
3993 // only have Thumb1 available, we can't use a constant pool and are forced to
3994 // use immediate relocations.
3995 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3996 if (Subtarget->useMovt())
3997 ++NumMovwMovt;
3998 // FIXME: Once remat is capable of dealing with instructions with register
3999 // operands, expand this into two nodes.
4000 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
4001 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
4002 } else {
4003 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
4004 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4005 return DAG.getLoad(
4006 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4008 }
4009}
4010
4011SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
4012 SelectionDAG &DAG) const {
4013 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4014 "ROPI/RWPI not currently supported for Darwin");
4015 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4016 SDLoc dl(Op);
4017 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4018
4019 if (Subtarget->useMovt())
4020 ++NumMovwMovt;
4021
4022 // FIXME: Once remat is capable of dealing with instructions with register
4023 // operands, expand this into multiple nodes
4024 unsigned Wrapper =
4026
4027 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4028 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4029
4030 if (Subtarget->isGVIndirectSymbol(GV))
4031 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4033 return Result;
4034}
4035
4036SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4037 SelectionDAG &DAG) const {
4038 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4039 assert(Subtarget->useMovt() &&
4040 "Windows on ARM expects to use movw/movt");
4041 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4042 "ROPI/RWPI not currently supported for Windows");
4043
4045 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4046 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4047 if (GV->hasDLLImportStorageClass())
4048 TargetFlags = ARMII::MO_DLLIMPORT;
4049 else if (!TM.shouldAssumeDSOLocal(GV))
4050 TargetFlags = ARMII::MO_COFFSTUB;
4051 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4053 SDLoc DL(Op);
4054
4055 ++NumMovwMovt;
4056
4057 // FIXME: Once remat is capable of dealing with instructions with register
4058 // operands, expand this into two nodes.
4059 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4060 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4061 TargetFlags));
4062 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4063 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4065 return Result;
4066}
4067
4068SDValue
4069ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4070 SDLoc dl(Op);
4071 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4072 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4073 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4074 Op.getOperand(1), Val);
4075}
4076
4077SDValue
4078ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4079 SDLoc dl(Op);
4080 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4081 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4082}
4083
4084SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4085 SelectionDAG &DAG) const {
4086 SDLoc dl(Op);
4087 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4088 Op.getOperand(0));
4089}
4090
4091SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4092 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4093 unsigned IntNo =
4094 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4095 switch (IntNo) {
4096 default:
4097 return SDValue(); // Don't custom lower most intrinsics.
4098 case Intrinsic::arm_gnu_eabi_mcount: {
4100 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4101 SDLoc dl(Op);
4102 SDValue Chain = Op.getOperand(0);
4103 // call "\01__gnu_mcount_nc"
4104 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4105 const uint32_t *Mask =
4107 assert(Mask && "Missing call preserved mask for calling convention");
4108 // Mark LR an implicit live-in.
4109 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4110 SDValue ReturnAddress =
4111 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4112 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4113 SDValue Callee =
4114 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4116 if (Subtarget->isThumb())
4117 return SDValue(
4118 DAG.getMachineNode(
4119 ARM::tBL_PUSHLR, dl, ResultTys,
4120 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4121 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4122 0);
4123 return SDValue(
4124 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4125 {ReturnAddress, Callee, RegisterMask, Chain}),
4126 0);
4127 }
4128 }
4129}
4130
4131SDValue
4132ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4133 const ARMSubtarget *Subtarget) const {
4134 unsigned IntNo = Op.getConstantOperandVal(0);
4135 SDLoc dl(Op);
4136 switch (IntNo) {
4137 default: return SDValue(); // Don't custom lower most intrinsics.
4138 case Intrinsic::thread_pointer: {
4139 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4140 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4141 }
4142 case Intrinsic::arm_cls: {
4143 const SDValue &Operand = Op.getOperand(1);
4144 const EVT VTy = Op.getValueType();
4145 SDValue SRA =
4146 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4147 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4148 SDValue SHL =
4149 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4150 SDValue OR =
4151 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4152 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4153 return Result;
4154 }
4155 case Intrinsic::arm_cls64: {
4156 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4157 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4158 const SDValue &Operand = Op.getOperand(1);
4159 const EVT VTy = Op.getValueType();
4160 SDValue Lo, Hi;
4161 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4162 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4163 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4164 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4165 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4166 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4167 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4168 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4169 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4170 SDValue CheckLo =
4171 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4172 SDValue HiIsZero =
4173 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4174 SDValue AdjustedLo =
4175 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4176 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4177 SDValue Result =
4178 DAG.getSelect(dl, VTy, CheckLo,
4179 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4180 return Result;
4181 }
4182 case Intrinsic::eh_sjlj_lsda: {
4185 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4186 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4187 SDValue CPAddr;
4188 bool IsPositionIndependent = isPositionIndependent();
4189 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4191 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4192 ARMCP::CPLSDA, PCAdj);
4193 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4194 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4195 SDValue Result = DAG.getLoad(
4196 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4198
4199 if (IsPositionIndependent) {
4200 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4201 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4202 }
4203 return Result;
4204 }
4205 case Intrinsic::arm_neon_vabs:
4206 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4207 Op.getOperand(1));
4208 case Intrinsic::arm_neon_vabds:
4209 if (Op.getValueType().isInteger())
4210 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4211 Op.getOperand(1), Op.getOperand(2));
4212 return SDValue();
4213 case Intrinsic::arm_neon_vabdu:
4214 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4215 Op.getOperand(1), Op.getOperand(2));
4216 case Intrinsic::arm_neon_vmulls:
4217 case Intrinsic::arm_neon_vmullu: {
4218 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4220 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4221 Op.getOperand(1), Op.getOperand(2));
4222 }
4223 case Intrinsic::arm_neon_vminnm:
4224 case Intrinsic::arm_neon_vmaxnm: {
4225 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4227 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4228 Op.getOperand(1), Op.getOperand(2));
4229 }
4230 case Intrinsic::arm_neon_vminu:
4231 case Intrinsic::arm_neon_vmaxu: {
4232 if (Op.getValueType().isFloatingPoint())
4233 return SDValue();
4234 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4235 ? ISD::UMIN : ISD::UMAX;
4236 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4237 Op.getOperand(1), Op.getOperand(2));
4238 }
4239 case Intrinsic::arm_neon_vmins:
4240 case Intrinsic::arm_neon_vmaxs: {
4241 // v{min,max}s is overloaded between signed integers and floats.
4242 if (!Op.getValueType().isFloatingPoint()) {
4243 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4244 ? ISD::SMIN : ISD::SMAX;
4245 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4246 Op.getOperand(1), Op.getOperand(2));
4247 }
4248 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4250 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4251 Op.getOperand(1), Op.getOperand(2));
4252 }
4253 case Intrinsic::arm_neon_vtbl1:
4254 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4255 Op.getOperand(1), Op.getOperand(2));
4256 case Intrinsic::arm_neon_vtbl2:
4257 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4258 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4259 case Intrinsic::arm_mve_pred_i2v:
4260 case Intrinsic::arm_mve_pred_v2i:
4261 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4262 Op.getOperand(1));
4263 case Intrinsic::arm_mve_vreinterpretq:
4264 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4265 Op.getOperand(1));
4266 case Intrinsic::arm_mve_lsll:
4267 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4268 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4269 case Intrinsic::arm_mve_asrl:
4270 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4271 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4272 }
4273}
4274
4276 const ARMSubtarget *Subtarget) {
4277 SDLoc dl(Op);
4278 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4279 if (SSID == SyncScope::SingleThread)
4280 return Op;
4281
4282 if (!Subtarget->hasDataBarrier()) {
4283 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4284 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4285 // here.
4286 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4287 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4288 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4289 DAG.getConstant(0, dl, MVT::i32));
4290 }
4291
4292 AtomicOrdering Ord =
4293 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4295 if (Subtarget->isMClass()) {
4296 // Only a full system barrier exists in the M-class architectures.
4298 } else if (Subtarget->preferISHSTBarriers() &&
4299 Ord == AtomicOrdering::Release) {
4300 // Swift happens to implement ISHST barriers in a way that's compatible with
4301 // Release semantics but weaker than ISH so we'd be fools not to use
4302 // it. Beware: other processors probably don't!
4304 }
4305
4306 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4307 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4308 DAG.getConstant(Domain, dl, MVT::i32));
4309}
4310
4312 const ARMSubtarget *Subtarget) {
4313 // ARM pre v5TE and Thumb1 does not have preload instructions.
4314 if (!(Subtarget->isThumb2() ||
4315 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4316 // Just preserve the chain.
4317 return Op.getOperand(0);
4318
4319 SDLoc dl(Op);
4320 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4321 if (!isRead &&
4322 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4323 // ARMv7 with MP extension has PLDW.
4324 return Op.getOperand(0);
4325
4326 unsigned isData = Op.getConstantOperandVal(4);
4327 if (Subtarget->isThumb()) {
4328 // Invert the bits.
4329 isRead = ~isRead & 1;
4330 isData = ~isData & 1;
4331 }
4332
4333 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4334 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4335 DAG.getConstant(isData, dl, MVT::i32));
4336}
4337
4340 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4341
4342 // vastart just stores the address of the VarArgsFrameIndex slot into the
4343 // memory location argument.
4344 SDLoc dl(Op);
4346 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4347 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4348 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4349 MachinePointerInfo(SV));
4350}
4351
4352SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4353 CCValAssign &NextVA,
4354 SDValue &Root,
4355 SelectionDAG &DAG,
4356 const SDLoc &dl) const {
4359
4360 const TargetRegisterClass *RC;
4361 if (AFI->isThumb1OnlyFunction())
4362 RC = &ARM::tGPRRegClass;
4363 else
4364 RC = &ARM::GPRRegClass;
4365
4366 // Transform the arguments stored in physical registers into virtual ones.
4367 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4368 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4369
4370 SDValue ArgValue2;
4371 if (NextVA.isMemLoc()) {
4372 MachineFrameInfo &MFI = MF.getFrameInfo();
4373 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4374
4375 // Create load node to retrieve arguments from the stack.
4376 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4377 ArgValue2 = DAG.getLoad(
4378 MVT::i32, dl, Root, FIN,
4380 } else {
4381 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4382 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4383 }
4384 if (!Subtarget->isLittle())
4385 std::swap (ArgValue, ArgValue2);
4386 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4387}
4388
4389// The remaining GPRs hold either the beginning of variable-argument
4390// data, or the beginning of an aggregate passed by value (usually
4391// byval). Either way, we allocate stack slots adjacent to the data
4392// provided by our caller, and store the unallocated registers there.
4393// If this is a variadic function, the va_list pointer will begin with
4394// these values; otherwise, this reassembles a (byval) structure that
4395// was split between registers and memory.
4396// Return: The frame index registers were stored into.
4397int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4398 const SDLoc &dl, SDValue &Chain,
4399 const Value *OrigArg,
4400 unsigned InRegsParamRecordIdx,
4401 int ArgOffset, unsigned ArgSize) const {
4402 // Currently, two use-cases possible:
4403 // Case #1. Non-var-args function, and we meet first byval parameter.
4404 // Setup first unallocated register as first byval register;
4405 // eat all remained registers
4406 // (these two actions are performed by HandleByVal method).
4407 // Then, here, we initialize stack frame with
4408 // "store-reg" instructions.
4409 // Case #2. Var-args function, that doesn't contain byval parameters.
4410 // The same: eat all remained unallocated registers,
4411 // initialize stack frame.
4412
4414 MachineFrameInfo &MFI = MF.getFrameInfo();
4416 unsigned RBegin, REnd;
4417 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4418 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4419 } else {
4420 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4421 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4422 REnd = ARM::R4;
4423 }
4424
4425 if (REnd != RBegin)
4426 ArgOffset = -4 * (ARM::R4 - RBegin);
4427
4428 auto PtrVT = getPointerTy(DAG.getDataLayout());
4429 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4430 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4431
4433 const TargetRegisterClass *RC =
4434 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4435
4436 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4437 Register VReg = MF.addLiveIn(Reg, RC);
4438 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4439 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4440 MachinePointerInfo(OrigArg, 4 * i));
4441 MemOps.push_back(Store);
4442 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4443 }
4444
4445 if (!MemOps.empty())
4446 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4447 return FrameIndex;
4448}
4449
4450// Setup stack frame, the va_list pointer will start from.
4451void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4452 const SDLoc &dl, SDValue &Chain,
4453 unsigned ArgOffset,
4454 unsigned TotalArgRegsSaveSize,
4455 bool ForceMutable) const {
4458
4459 // Try to store any remaining integer argument regs
4460 // to their spots on the stack so that they may be loaded by dereferencing
4461 // the result of va_next.
4462 // If there is no regs to be stored, just point address after last
4463 // argument passed via stack.
4464 int FrameIndex = StoreByValRegs(
4465 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4466 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4467 AFI->setVarArgsFrameIndex(FrameIndex);
4468}
4469
4470bool ARMTargetLowering::splitValueIntoRegisterParts(
4471 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4472 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4473 EVT ValueVT = Val.getValueType();
4474 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4475 unsigned ValueBits = ValueVT.getSizeInBits();
4476 unsigned PartBits = PartVT.getSizeInBits();
4477 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4478 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4479 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4480 Parts[0] = Val;
4481 return true;
4482 }
4483 return false;
4484}
4485
4486SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4487 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4488 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4489 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4490 unsigned ValueBits = ValueVT.getSizeInBits();
4491 unsigned PartBits = PartVT.getSizeInBits();
4492 SDValue Val = Parts[0];
4493
4494 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4495 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4496 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4497 return Val;
4498 }
4499 return SDValue();
4500}
4501
4502SDValue ARMTargetLowering::LowerFormalArguments(
4503 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4504 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4505 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4507 MachineFrameInfo &MFI = MF.getFrameInfo();
4508
4510
4511 // Assign locations to all of the incoming arguments.
4513 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4514 *DAG.getContext());
4515 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4516
4518 unsigned CurArgIdx = 0;
4519
4520 // Initially ArgRegsSaveSize is zero.
4521 // Then we increase this value each time we meet byval parameter.
4522 // We also increase this value in case of varargs function.
4523 AFI->setArgRegsSaveSize(0);
4524
4525 // Calculate the amount of stack space that we need to allocate to store
4526 // byval and variadic arguments that are passed in registers.
4527 // We need to know this before we allocate the first byval or variadic
4528 // argument, as they will be allocated a stack slot below the CFA (Canonical
4529 // Frame Address, the stack pointer at entry to the function).
4530 unsigned ArgRegBegin = ARM::R4;
4531 for (const CCValAssign &VA : ArgLocs) {
4532 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4533 break;
4534
4535 unsigned Index = VA.getValNo();
4536 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4537 if (!Flags.isByVal())
4538 continue;
4539
4540 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4541 unsigned RBegin, REnd;
4542 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4543 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4544
4545 CCInfo.nextInRegsParam();
4546 }
4547 CCInfo.rewindByValRegsInfo();
4548
4549 int lastInsIndex = -1;
4550 if (isVarArg && MFI.hasVAStart()) {
4551 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4552 if (RegIdx != std::size(GPRArgRegs))
4553 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4554 }
4555
4556 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4557 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4558 auto PtrVT = getPointerTy(DAG.getDataLayout());
4559
4560 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4561 CCValAssign &VA = ArgLocs[i];
4562 if (Ins[VA.getValNo()].isOrigArg()) {
4563 std::advance(CurOrigArg,
4564 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4565 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4566 }
4567 // Arguments stored in registers.
4568 if (VA.isRegLoc()) {
4569 EVT RegVT = VA.getLocVT();
4570 SDValue ArgValue;
4571
4572 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4573 // f64 and vector types are split up into multiple registers or
4574 // combinations of registers and stack slots.
4575 SDValue ArgValue1 =
4576 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4577 VA = ArgLocs[++i]; // skip ahead to next loc
4578 SDValue ArgValue2;
4579 if (VA.isMemLoc()) {
4580 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4581 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4582 ArgValue2 = DAG.getLoad(
4583 MVT::f64, dl, Chain, FIN,
4585 } else {
4586 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4587 }
4588 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4589 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4590 ArgValue1, DAG.getIntPtrConstant(0, dl));
4591 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4592 ArgValue2, DAG.getIntPtrConstant(1, dl));
4593 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4594 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4595 } else {
4596 const TargetRegisterClass *RC;
4597
4598 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4599 RC = &ARM::HPRRegClass;
4600 else if (RegVT == MVT::f32)
4601 RC = &ARM::SPRRegClass;
4602 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4603 RegVT == MVT::v4bf16)
4604 RC = &ARM::DPRRegClass;
4605 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4606 RegVT == MVT::v8bf16)
4607 RC = &ARM::QPRRegClass;
4608 else if (RegVT == MVT::i32)
4609 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4610 : &ARM::GPRRegClass;
4611 else
4612 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4613
4614 // Transform the arguments in physical registers into virtual ones.
4615 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4616 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4617
4618 // If this value is passed in r0 and has the returned attribute (e.g.
4619 // C++ 'structors), record this fact for later use.
4620 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4621 AFI->setPreservesR0();
4622 }
4623 }
4624
4625 // If this is an 8 or 16-bit value, it is really passed promoted
4626 // to 32 bits. Insert an assert[sz]ext to capture this, then
4627 // truncate to the right size.
4628 switch (VA.getLocInfo()) {
4629 default: llvm_unreachable("Unknown loc info!");
4630 case CCValAssign::Full: break;
4631 case CCValAssign::BCvt:
4632 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4633 break;
4634 }
4635
4636 // f16 arguments have their size extended to 4 bytes and passed as if they
4637 // had been copied to the LSBs of a 32-bit register.
4638 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4639 if (VA.needsCustom() &&
4640 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4641 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4642
4643 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4644 // less than 32 bits must be sign- or zero-extended in the callee for
4645 // security reasons. Although the ABI mandates an extension done by the
4646 // caller, the latter cannot be trusted to follow the rules of the ABI.
4647 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4648 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4649 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4650 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4651
4652 InVals.push_back(ArgValue);
4653 } else { // VA.isRegLoc()
4654 // Only arguments passed on the stack should make it here.
4655 assert(VA.isMemLoc());
4656 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4657
4658 int index = VA.getValNo();
4659
4660 // Some Ins[] entries become multiple ArgLoc[] entries.
4661 // Process them only once.
4662 if (index != lastInsIndex)
4663 {
4664 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4665 // FIXME: For now, all byval parameter objects are marked mutable.
4666 // This can be changed with more analysis.
4667 // In case of tail call optimization mark all arguments mutable.
4668 // Since they could be overwritten by lowering of arguments in case of
4669 // a tail call.
4670 if (Flags.isByVal()) {
4671 assert(Ins[index].isOrigArg() &&
4672 "Byval arguments cannot be implicit");
4673 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4674
4675 int FrameIndex = StoreByValRegs(
4676 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4677 VA.getLocMemOffset(), Flags.getByValSize());
4678 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4679 CCInfo.nextInRegsParam();
4680 } else {
4681 unsigned FIOffset = VA.getLocMemOffset();
4682 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4683 FIOffset, true);
4684
4685 // Create load nodes to retrieve arguments from the stack.
4686 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4687 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4689 DAG.getMachineFunction(), FI)));
4690 }
4691 lastInsIndex = index;
4692 }
4693 }
4694 }
4695
4696 // varargs
4697 if (isVarArg && MFI.hasVAStart()) {
4698 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4699 TotalArgRegsSaveSize);
4700 if (AFI->isCmseNSEntryFunction()) {
4703 "secure entry function must not be variadic", dl.getDebugLoc());
4704 DAG.getContext()->diagnose(Diag);
4705 }
4706 }
4707
4708 unsigned StackArgSize = CCInfo.getStackSize();
4709 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4710 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4711 // The only way to guarantee a tail call is if the callee restores its
4712 // argument area, but it must also keep the stack aligned when doing so.
4713 const DataLayout &DL = DAG.getDataLayout();
4714 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4715
4716 AFI->setArgumentStackToRestore(StackArgSize);
4717 }
4718 AFI->setArgumentStackSize(StackArgSize);
4719
4720 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4723 "secure entry function requires arguments on stack", dl.getDebugLoc());
4724 DAG.getContext()->diagnose(Diag);
4725 }
4726
4727 return Chain;
4728}
4729
4730/// isFloatingPointZero - Return true if this is +0.0.
4732 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4733 return CFP->getValueAPF().isPosZero();
4734 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4735 // Maybe this has already been legalized into the constant pool?
4736 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4737 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4738 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4739 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4740 return CFP->getValueAPF().isPosZero();
4741 }
4742 } else if (Op->getOpcode() == ISD::BITCAST &&
4743 Op->getValueType(0) == MVT::f64) {
4744 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4745 // created by LowerConstantFP().
4746 SDValue BitcastOp = Op->getOperand(0);
4747 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4748 isNullConstant(BitcastOp->getOperand(0)))
4749 return true;
4750 }
4751 return false;
4752}
4753
4754/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4755/// the given operands.
4756SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4757 SDValue &ARMcc, SelectionDAG &DAG,
4758 const SDLoc &dl) const {
4759 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4760 unsigned C = RHSC->getZExtValue();
4761 if (!isLegalICmpImmediate((int32_t)C)) {
4762 // Constant does not fit, try adjusting it by one.
4763 switch (CC) {
4764 default: break;
4765 case ISD::SETLT:
4766 case ISD::SETGE:
4767 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4769 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4770 }
4771 break;
4772 case ISD::SETULT:
4773 case ISD::SETUGE:
4774 if (C != 0 && isLegalICmpImmediate(C-1)) {
4776 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4777 }
4778 break;
4779 case ISD::SETLE:
4780 case ISD::SETGT:
4781 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4783 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4784 }
4785 break;
4786 case ISD::SETULE:
4787 case ISD::SETUGT:
4788 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4790 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4791 }
4792 break;
4793 }
4794 }
4795 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4797 // In ARM and Thumb-2, the compare instructions can shift their second
4798 // operand.
4800 std::swap(LHS, RHS);
4801 }
4802
4803 // Thumb1 has very limited immediate modes, so turning an "and" into a
4804 // shift can save multiple instructions.
4805 //
4806 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4807 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4808 // own. If it's the operand to an unsigned comparison with an immediate,
4809 // we can eliminate one of the shifts: we transform
4810 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4811 //
4812 // We avoid transforming cases which aren't profitable due to encoding
4813 // details:
4814 //
4815 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4816 // would not; in that case, we're essentially trading one immediate load for
4817 // another.
4818 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4819 // 3. C2 is zero; we have other code for this special case.
4820 //
4821 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4822 // instruction, since the AND is always one instruction anyway, but we could
4823 // use narrow instructions in some cases.
4824 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4825 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4826 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4827 !isSignedIntSetCC(CC)) {
4828 unsigned Mask = LHS.getConstantOperandVal(1);
4829 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4830 uint64_t RHSV = RHSC->getZExtValue();
4831 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4832 unsigned ShiftBits = llvm::countl_zero(Mask);
4833 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4834 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4835 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4836 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4837 }
4838 }
4839 }
4840
4841 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4842 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4843 // way a cmp would.
4844 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4845 // some tweaks to the heuristics for the previous and->shift transform.
4846 // FIXME: Optimize cases where the LHS isn't a shift.
4847 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4848 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4849 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4850 LHS.getConstantOperandVal(1) < 31) {
4851 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4852 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4853 DAG.getVTList(MVT::i32, MVT::i32),
4854 LHS.getOperand(0),
4855 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4856 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4857 Shift.getValue(1), SDValue());
4858 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4859 return Chain.getValue(1);
4860 }
4861
4863
4864 // If the RHS is a constant zero then the V (overflow) flag will never be
4865 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4866 // simpler for other passes (like the peephole optimiser) to deal with.
4867 if (isNullConstant(RHS)) {
4868 switch (CondCode) {
4869 default: break;
4870 case ARMCC::GE:
4872 break;
4873 case ARMCC::LT:
4875 break;
4876 }
4877 }
4878
4879 ARMISD::NodeType CompareType;
4880 switch (CondCode) {
4881 default:
4882 CompareType = ARMISD::CMP;
4883 break;
4884 case ARMCC::EQ:
4885 case ARMCC::NE:
4886 // Uses only Z Flag
4887 CompareType = ARMISD::CMPZ;
4888 break;
4889 }
4890 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4891 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4892}
4893
4894/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4895SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4896 SelectionDAG &DAG, const SDLoc &dl,
4897 bool Signaling) const {
4898 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4899 SDValue Cmp;
4900 if (!isFloatingPointZero(RHS))
4901 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4902 dl, MVT::Glue, LHS, RHS);
4903 else
4904 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4905 dl, MVT::Glue, LHS);
4906 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4907}
4908
4909/// duplicateCmp - Glue values can have only one use, so this function
4910/// duplicates a comparison node.
4911SDValue
4912ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4913 unsigned Opc = Cmp.getOpcode();
4914 SDLoc DL(Cmp);
4915 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4916 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4917
4918 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4919 Cmp = Cmp.getOperand(0);
4920 Opc = Cmp.getOpcode();
4921 if (Opc == ARMISD::CMPFP)
4922 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4923 else {
4924 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4925 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4926 }
4927 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4928}
4929
4930// This function returns three things: the arithmetic computation itself
4931// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4932// comparison and the condition code define the case in which the arithmetic
4933// computation *does not* overflow.
4934std::pair<SDValue, SDValue>
4935ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4936 SDValue &ARMcc) const {
4937 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4938
4939 SDValue Value, OverflowCmp;
4940 SDValue LHS = Op.getOperand(0);
4941 SDValue RHS = Op.getOperand(1);
4942 SDLoc dl(Op);
4943
4944 // FIXME: We are currently always generating CMPs because we don't support
4945 // generating CMN through the backend. This is not as good as the natural
4946 // CMP case because it causes a register dependency and cannot be folded
4947 // later.
4948
4949 switch (Op.getOpcode()) {
4950 default:
4951 llvm_unreachable("Unknown overflow instruction!");
4952 case ISD::SADDO:
4953 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4954 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4955 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4956 break;
4957 case ISD::UADDO:
4958 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4959 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4960 // We do not use it in the USUBO case as Value may not be used.
4961 Value = DAG.getNode(ARMISD::ADDC, dl,
4962 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4963 .getValue(0);
4964 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4965 break;
4966 case ISD::SSUBO:
4967 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4968 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4969 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4970 break;
4971 case ISD::USUBO:
4972 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4973 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4974 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4975 break;
4976 case ISD::UMULO:
4977 // We generate a UMUL_LOHI and then check if the high word is 0.
4978 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4979 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4980 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4981 LHS, RHS);
4982 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4983 DAG.getConstant(0, dl, MVT::i32));
4984 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4985 break;
4986 case ISD::SMULO:
4987 // We generate a SMUL_LOHI and then check if all the bits of the high word
4988 // are the same as the sign bit of the low word.
4989 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4990 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4991 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4992 LHS, RHS);
4993 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4994 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4995 Value.getValue(0),
4996 DAG.getConstant(31, dl, MVT::i32)));
4997 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4998 break;
4999 } // switch (...)
5000
5001 return std::make_pair(Value, OverflowCmp);
5002}
5003
5004SDValue
5005ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
5006 // Let legalize expand this if it isn't a legal type yet.
5007 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5008 return SDValue();
5009
5010 SDValue Value, OverflowCmp;
5011 SDValue ARMcc;
5012 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
5013 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5014 SDLoc dl(Op);
5015 // We use 0 and 1 as false and true values.
5016 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
5017 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
5018 EVT VT = Op.getValueType();
5019
5020 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
5021 ARMcc, CCR, OverflowCmp);
5022
5023 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
5024 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5025}
5026
5028 SelectionDAG &DAG) {
5029 SDLoc DL(BoolCarry);
5030 EVT CarryVT = BoolCarry.getValueType();
5031
5032 // This converts the boolean value carry into the carry flag by doing
5033 // ARMISD::SUBC Carry, 1
5034 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5035 DAG.getVTList(CarryVT, MVT::i32),
5036 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5037 return Carry.getValue(1);
5038}
5039
5041 SelectionDAG &DAG) {
5042 SDLoc DL(Flags);
5043
5044 // Now convert the carry flag into a boolean carry. We do this
5045 // using ARMISD:ADDE 0, 0, Carry
5046 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5047 DAG.getConstant(0, DL, MVT::i32),
5048 DAG.getConstant(0, DL, MVT::i32), Flags);
5049}
5050
5051SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5052 SelectionDAG &DAG) const {
5053 // Let legalize expand this if it isn't a legal type yet.
5054 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5055 return SDValue();
5056
5057 SDValue LHS = Op.getOperand(0);
5058 SDValue RHS = Op.getOperand(1);
5059 SDLoc dl(Op);
5060
5061 EVT VT = Op.getValueType();
5062 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5063 SDValue Value;
5064 SDValue Overflow;
5065 switch (Op.getOpcode()) {
5066 default:
5067 llvm_unreachable("Unknown overflow instruction!");
5068 case ISD::UADDO:
5069 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5070 // Convert the carry flag into a boolean value.
5071 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5072 break;
5073 case ISD::USUBO: {
5074 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5075 // Convert the carry flag into a boolean value.
5076 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5077 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5078 // value. So compute 1 - C.
5079 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5080 DAG.getConstant(1, dl, MVT::i32), Overflow);
5081 break;
5082 }
5083 }
5084
5085 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5086}
5087
5089 const ARMSubtarget *Subtarget) {
5090 EVT VT = Op.getValueType();
5091 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5092 return SDValue();
5093 if (!VT.isSimple())
5094 return SDValue();
5095
5096 unsigned NewOpcode;
5097 switch (VT.getSimpleVT().SimpleTy) {
5098 default:
5099 return SDValue();
5100 case MVT::i8:
5101 switch (Op->getOpcode()) {
5102 case ISD::UADDSAT:
5103 NewOpcode = ARMISD::UQADD8b;
5104 break;
5105 case ISD::SADDSAT:
5106 NewOpcode = ARMISD::QADD8b;
5107 break;
5108 case ISD::USUBSAT:
5109 NewOpcode = ARMISD::UQSUB8b;
5110 break;
5111 case ISD::SSUBSAT:
5112 NewOpcode = ARMISD::QSUB8b;
5113 break;
5114 }
5115 break;
5116 case MVT::i16:
5117 switch (Op->getOpcode()) {
5118 case ISD::UADDSAT:
5119 NewOpcode = ARMISD::UQADD16b;
5120 break;
5121 case ISD::SADDSAT:
5122 NewOpcode = ARMISD::QADD16b;
5123 break;
5124 case ISD::USUBSAT:
5125 NewOpcode = ARMISD::UQSUB16b;
5126 break;
5127 case ISD::SSUBSAT:
5128 NewOpcode = ARMISD::QSUB16b;
5129 break;
5130 }
5131 break;
5132 }
5133
5134 SDLoc dl(Op);
5135 SDValue Add =
5136 DAG.getNode(NewOpcode, dl, MVT::i32,
5137 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5138 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5139 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5140}
5141
5142SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5143 SDValue Cond = Op.getOperand(0);
5144 SDValue SelectTrue = Op.getOperand(1);
5145 SDValue SelectFalse = Op.getOperand(2);
5146 SDLoc dl(Op);
5147 unsigned Opc = Cond.getOpcode();
5148
5149 if (Cond.getResNo() == 1 &&
5150 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5151 Opc == ISD::USUBO)) {
5152 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5153 return SDValue();
5154
5155 SDValue Value, OverflowCmp;
5156 SDValue ARMcc;
5157 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5158 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5159 EVT VT = Op.getValueType();
5160
5161 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5162 OverflowCmp, DAG);
5163 }
5164
5165 // Convert:
5166 //
5167 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5168 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5169 //
5170 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5171 const ConstantSDNode *CMOVTrue =
5172 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5173 const ConstantSDNode *CMOVFalse =
5174 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5175
5176 if (CMOVTrue && CMOVFalse) {
5177 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5178 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5179
5180 SDValue True;
5181 SDValue False;
5182 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5183 True = SelectTrue;
5184 False = SelectFalse;
5185 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5186 True = SelectFalse;
5187 False = SelectTrue;
5188 }
5189
5190 if (True.getNode() && False.getNode()) {
5191 EVT VT = Op.getValueType();
5192 SDValue ARMcc = Cond.getOperand(2);
5193 SDValue CCR = Cond.getOperand(3);
5194 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5195 assert(True.getValueType() == VT);
5196 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5197 }
5198 }
5199 }
5200
5201 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5202 // undefined bits before doing a full-word comparison with zero.
5203 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5204 DAG.getConstant(1, dl, Cond.getValueType()));
5205
5206 return DAG.getSelectCC(dl, Cond,
5207 DAG.getConstant(0, dl, Cond.getValueType()),
5208 SelectTrue, SelectFalse, ISD::SETNE);
5209}
5210
5212 bool &swpCmpOps, bool &swpVselOps) {
5213 // Start by selecting the GE condition code for opcodes that return true for
5214 // 'equality'
5215 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5216 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5217 CondCode = ARMCC::GE;
5218
5219 // and GT for opcodes that return false for 'equality'.
5220 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5221 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5222 CondCode = ARMCC::GT;
5223
5224 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5225 // to swap the compare operands.
5226 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5227 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5228 swpCmpOps = true;
5229
5230 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5231 // If we have an unordered opcode, we need to swap the operands to the VSEL
5232 // instruction (effectively negating the condition).
5233 //
5234 // This also has the effect of swapping which one of 'less' or 'greater'
5235 // returns true, so we also swap the compare operands. It also switches
5236 // whether we return true for 'equality', so we compensate by picking the
5237 // opposite condition code to our original choice.
5238 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5239 CC == ISD::SETUGT) {
5240 swpCmpOps = !swpCmpOps;
5241 swpVselOps = !swpVselOps;
5242 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5243 }
5244
5245 // 'ordered' is 'anything but unordered', so use the VS condition code and
5246 // swap the VSEL operands.
5247 if (CC == ISD::SETO) {
5248 CondCode = ARMCC::VS;
5249 swpVselOps = true;
5250 }
5251
5252 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5253 // code and swap the VSEL operands. Also do this if we don't care about the
5254 // unordered case.
5255 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5256 CondCode = ARMCC::EQ;
5257 swpVselOps = true;
5258 }
5259}
5260
5261SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5262 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5263 SDValue Cmp, SelectionDAG &DAG) const {
5264 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5266 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5268 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5269
5270 SDValue TrueLow = TrueVal.getValue(0);
5271 SDValue TrueHigh = TrueVal.getValue(1);
5272 SDValue FalseLow = FalseVal.getValue(0);
5273 SDValue FalseHigh = FalseVal.getValue(1);
5274
5275 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5276 ARMcc, CCR, Cmp);
5277 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5278 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5279
5280 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5281 } else {
5282 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5283 Cmp);
5284 }
5285}
5286
5288 return CC == ISD::SETGT || CC == ISD::SETGE;
5289}
5290
5292 return CC == ISD::SETLT || CC == ISD::SETLE;
5293}
5294
5295// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5296// All of these conditions (and their <= and >= counterparts) will do:
5297// x < k ? k : x
5298// x > k ? x : k
5299// k < x ? x : k
5300// k > x ? k : x
5301static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5302 const SDValue TrueVal, const SDValue FalseVal,
5303 const ISD::CondCode CC, const SDValue K) {
5304 return (isGTorGE(CC) &&
5305 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5306 (isLTorLE(CC) &&
5307 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5308}
5309
5310// Check if two chained conditionals could be converted into SSAT or USAT.
5311//
5312// SSAT can replace a set of two conditional selectors that bound a number to an
5313// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5314//
5315// x < -k ? -k : (x > k ? k : x)
5316// x < -k ? -k : (x < k ? x : k)
5317// x > -k ? (x > k ? k : x) : -k
5318// x < k ? (x < -k ? -k : x) : k
5319// etc.
5320//
5321// LLVM canonicalizes these to either a min(max()) or a max(min())
5322// pattern. This function tries to match one of these and will return a SSAT
5323// node if successful.
5324//
5325// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5326// is a power of 2.
5328 EVT VT = Op.getValueType();
5329 SDValue V1 = Op.getOperand(0);
5330 SDValue K1 = Op.getOperand(1);
5331 SDValue TrueVal1 = Op.getOperand(2);
5332 SDValue FalseVal1 = Op.getOperand(3);
5333 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5334
5335 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5336 if (Op2.getOpcode() != ISD::SELECT_CC)
5337 return SDValue();
5338
5339 SDValue V2 = Op2.getOperand(0);
5340 SDValue K2 = Op2.getOperand(1);
5341 SDValue TrueVal2 = Op2.getOperand(2);
5342 SDValue FalseVal2 = Op2.getOperand(3);
5343 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5344
5345 SDValue V1Tmp = V1;
5346 SDValue V2Tmp = V2;
5347
5348 // Check that the registers and the constants match a max(min()) or min(max())
5349 // pattern
5350 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5351 K2 != FalseVal2 ||
5352 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5353 return SDValue();
5354
5355 // Check that the constant in the lower-bound check is
5356 // the opposite of the constant in the upper-bound check
5357 // in 1's complement.
5358 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5359 return SDValue();
5360
5361 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5362 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5363 int64_t PosVal = std::max(Val1, Val2);
5364 int64_t NegVal = std::min(Val1, Val2);
5365
5366 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5367 !isPowerOf2_64(PosVal + 1))
5368 return SDValue();
5369
5370 // Handle the difference between USAT (unsigned) and SSAT (signed)
5371 // saturation
5372 // At this point, PosVal is guaranteed to be positive
5373 uint64_t K = PosVal;
5374 SDLoc dl(Op);
5375 if (Val1 == ~Val2)
5376 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5377 DAG.getConstant(llvm::countr_one(K), dl, VT));
5378 if (NegVal == 0)
5379 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5380 DAG.getConstant(llvm::countr_one(K), dl, VT));
5381
5382 return SDValue();
5383}
5384
5385// Check if a condition of the type x < k ? k : x can be converted into a
5386// bit operation instead of conditional moves.
5387// Currently this is allowed given:
5388// - The conditions and values match up
5389// - k is 0 or -1 (all ones)
5390// This function will not check the last condition, thats up to the caller
5391// It returns true if the transformation can be made, and in such case
5392// returns x in V, and k in SatK.
5394 SDValue &SatK)
5395{
5396 SDValue LHS = Op.getOperand(0);
5397 SDValue RHS = Op.getOperand(1);
5398 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5399 SDValue TrueVal = Op.getOperand(2);
5400 SDValue FalseVal = Op.getOperand(3);
5401
5402 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5403 ? &RHS
5404 : nullptr;
5405
5406 // No constant operation in comparison, early out
5407 if (!K)
5408 return false;
5409
5410 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5411 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5412 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5413
5414 // If the constant on left and right side, or variable on left and right,
5415 // does not match, early out
5416 if (*K != KTmp || V != VTmp)
5417 return false;
5418
5419 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5420 SatK = *K;
5421 return true;
5422 }
5423
5424 return false;
5425}
5426
5427bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5428 if (VT == MVT::f32)
5429 return !Subtarget->hasVFP2Base();
5430 if (VT == MVT::f64)
5431 return !Subtarget->hasFP64();
5432 if (VT == MVT::f16)
5433 return !Subtarget->hasFullFP16();
5434 return false;
5435}
5436
5437SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5438 EVT VT = Op.getValueType();
5439 SDLoc dl(Op);
5440
5441 // Try to convert two saturating conditional selects into a single SSAT
5442 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5443 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5444 return SatValue;
5445
5446 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5447 // into more efficient bit operations, which is possible when k is 0 or -1
5448 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5449 // single instructions. On Thumb the shift and the bit operation will be two
5450 // instructions.
5451 // Only allow this transformation on full-width (32-bit) operations
5452 SDValue LowerSatConstant;
5453 SDValue SatValue;
5454 if (VT == MVT::i32 &&
5455 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5456 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5457 DAG.getConstant(31, dl, VT));
5458 if (isNullConstant(LowerSatConstant)) {
5459 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5460 DAG.getAllOnesConstant(dl, VT));
5461 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5462 } else if (isAllOnesConstant(LowerSatConstant))
5463 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5464 }
5465
5466 SDValue LHS = Op.getOperand(0);
5467 SDValue RHS = Op.getOperand(1);
5468 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5469 SDValue TrueVal = Op.getOperand(2);
5470 SDValue FalseVal = Op.getOperand(3);
5471 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5472 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5473
5474 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5475 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5476 unsigned TVal = CTVal->getZExtValue();
5477 unsigned FVal = CFVal->getZExtValue();
5478 unsigned Opcode = 0;
5479
5480 if (TVal == ~FVal) {
5481 Opcode = ARMISD::CSINV;
5482 } else if (TVal == ~FVal + 1) {
5483 Opcode = ARMISD::CSNEG;
5484 } else if (TVal + 1 == FVal) {
5485 Opcode = ARMISD::CSINC;
5486 } else if (TVal == FVal + 1) {
5487 Opcode = ARMISD::CSINC;
5488 std::swap(TrueVal, FalseVal);
5489 std::swap(TVal, FVal);
5490 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5491 }
5492
5493 if (Opcode) {
5494 // If one of the constants is cheaper than another, materialise the
5495 // cheaper one and let the csel generate the other.
5496 if (Opcode != ARMISD::CSINC &&
5497 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5498 std::swap(TrueVal, FalseVal);
5499 std::swap(TVal, FVal);
5500 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5501 }
5502
5503 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5504 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5505 // -(-a) == a, but (a+1)+1 != a).
5506 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5507 std::swap(TrueVal, FalseVal);
5508 std::swap(TVal, FVal);
5509 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5510 }
5511
5512 // Drops F's value because we can get it by inverting/negating TVal.
5513 FalseVal = TrueVal;
5514
5515 SDValue ARMcc;
5516 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5517 EVT VT = TrueVal.getValueType();
5518 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5519 }
5520 }
5521
5522 if (isUnsupportedFloatingType(LHS.getValueType())) {
5524 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5525
5526 // If softenSetCCOperands only returned one value, we should compare it to
5527 // zero.
5528 if (!RHS.getNode()) {
5529 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5530 CC = ISD::SETNE;
5531 }
5532 }
5533
5534 if (LHS.getValueType() == MVT::i32) {
5535 // Try to generate VSEL on ARMv8.
5536 // The VSEL instruction can't use all the usual ARM condition
5537 // codes: it only has two bits to select the condition code, so it's
5538 // constrained to use only GE, GT, VS and EQ.
5539 //
5540 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5541 // swap the operands of the previous compare instruction (effectively
5542 // inverting the compare condition, swapping 'less' and 'greater') and
5543 // sometimes need to swap the operands to the VSEL (which inverts the
5544 // condition in the sense of firing whenever the previous condition didn't)
5545 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5546 TrueVal.getValueType() == MVT::f32 ||
5547 TrueVal.getValueType() == MVT::f64)) {
5549 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5550 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5551 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5552 std::swap(TrueVal, FalseVal);
5553 }
5554 }
5555
5556 SDValue ARMcc;
5557 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5558 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5559 // Choose GE over PL, which vsel does now support
5560 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5561 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5562 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5563 }
5564
5565 ARMCC::CondCodes CondCode, CondCode2;
5566 FPCCToARMCC(CC, CondCode, CondCode2);
5567
5568 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5569 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5570 // must use VSEL (limited condition codes), due to not having conditional f16
5571 // moves.
5572 if (Subtarget->hasFPARMv8Base() &&
5573 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5574 (TrueVal.getValueType() == MVT::f16 ||
5575 TrueVal.getValueType() == MVT::f32 ||
5576 TrueVal.getValueType() == MVT::f64)) {
5577 bool swpCmpOps = false;
5578 bool swpVselOps = false;
5579 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5580
5581 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5582 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5583 if (swpCmpOps)
5584 std::swap(LHS, RHS);
5585 if (swpVselOps)
5586 std::swap(TrueVal, FalseVal);
5587 }
5588 }
5589
5590 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5591 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5592 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5593 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5594 if (CondCode2 != ARMCC::AL) {
5595 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5596 // FIXME: Needs another CMP because flag can have but one use.
5597 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5598 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5599 }
5600 return Result;
5601}
5602
5603/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5604/// to morph to an integer compare sequence.
5605static bool canChangeToInt(SDValue Op, bool &SeenZero,
5606 const ARMSubtarget *Subtarget) {
5607 SDNode *N = Op.getNode();
5608 if (!N->hasOneUse())
5609 // Otherwise it requires moving the value from fp to integer registers.
5610 return false;
5611 if (!N->getNumValues())
5612 return false;
5613 EVT VT = Op.getValueType();
5614 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5615 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5616 // vmrs are very slow, e.g. cortex-a8.
5617 return false;
5618
5619 if (isFloatingPointZero(Op)) {
5620 SeenZero = true;
5621 return true;
5622 }
5623 return ISD::isNormalLoad(N);
5624}
5625
5628 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5629
5630 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5631 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5632 Ld->getPointerInfo(), Ld->getAlign(),
5633 Ld->getMemOperand()->getFlags());
5634
5635 llvm_unreachable("Unknown VFP cmp argument!");
5636}
5637
5639 SDValue &RetVal1, SDValue &RetVal2) {
5640 SDLoc dl(Op);
5641
5642 if (isFloatingPointZero(Op)) {
5643 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5644 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5645 return;
5646 }
5647
5648 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5649 SDValue Ptr = Ld->getBasePtr();
5650 RetVal1 =
5651 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5652 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5653
5654 EVT PtrType = Ptr.getValueType();
5655 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5656 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5657 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5658 Ld->getPointerInfo().getWithOffset(4),
5659 commonAlignment(Ld->getAlign(), 4),
5660 Ld->getMemOperand()->getFlags());
5661 return;
5662 }
5663
5664 llvm_unreachable("Unknown VFP cmp argument!");
5665}
5666
5667/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5668/// f32 and even f64 comparisons to integer ones.
5669SDValue
5670ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5671 SDValue Chain = Op.getOperand(0);
5672 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5673 SDValue LHS = Op.getOperand(2);
5674 SDValue RHS = Op.getOperand(3);
5675 SDValue Dest = Op.getOperand(4);
5676 SDLoc dl(Op);
5677
5678 bool LHSSeenZero = false;
5679 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5680 bool RHSSeenZero = false;
5681 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5682 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5683 // If unsafe fp math optimization is enabled and there are no other uses of
5684 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5685 // to an integer comparison.
5686 if (CC == ISD::SETOEQ)
5687 CC = ISD::SETEQ;
5688 else if (CC == ISD::SETUNE)
5689 CC = ISD::SETNE;
5690
5691 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5692 SDValue ARMcc;
5693 if (LHS.getValueType() == MVT::f32) {
5694 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5695 bitcastf32Toi32(LHS, DAG), Mask);
5696 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5697 bitcastf32Toi32(RHS, DAG), Mask);
5698 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5699 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5700 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5701 Chain, Dest, ARMcc, CCR, Cmp);
5702 }
5703
5704 SDValue LHS1, LHS2;
5705 SDValue RHS1, RHS2;
5706 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5707 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5708 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5709 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5711 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5712 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5713 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5714 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5715 }
5716
5717 return SDValue();
5718}
5719
5720SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5721 SDValue Chain = Op.getOperand(0);
5722 SDValue Cond = Op.getOperand(1);
5723 SDValue Dest = Op.getOperand(2);
5724 SDLoc dl(Op);
5725
5726 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5727 // instruction.
5728 unsigned Opc = Cond.getOpcode();
5729 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5730 !Subtarget->isThumb1Only();
5731 if (Cond.getResNo() == 1 &&
5732 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5733 Opc == ISD::USUBO || OptimizeMul)) {
5734 // Only lower legal XALUO ops.
5735 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5736 return SDValue();
5737
5738 // The actual operation with overflow check.
5739 SDValue Value, OverflowCmp;
5740 SDValue ARMcc;
5741 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5742
5743 // Reverse the condition code.
5745 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5747 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5748 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5749
5750 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5751 OverflowCmp);
5752 }
5753
5754 return SDValue();
5755}
5756
5757SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5758 SDValue Chain = Op.getOperand(0);
5759 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5760 SDValue LHS = Op.getOperand(2);
5761 SDValue RHS = Op.getOperand(3);
5762 SDValue Dest = Op.getOperand(4);
5763 SDLoc dl(Op);
5764
5765 if (isUnsupportedFloatingType(LHS.getValueType())) {
5767 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5768
5769 // If softenSetCCOperands only returned one value, we should compare it to
5770 // zero.
5771 if (!RHS.getNode()) {
5772 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5773 CC = ISD::SETNE;
5774 }
5775 }
5776
5777 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5778 // instruction.
5779 unsigned Opc = LHS.getOpcode();
5780 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5781 !Subtarget->isThumb1Only();
5782 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5783 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5784 Opc == ISD::USUBO || OptimizeMul) &&
5785 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5786 // Only lower legal XALUO ops.
5787 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5788 return SDValue();
5789
5790 // The actual operation with overflow check.
5791 SDValue Value, OverflowCmp;
5792 SDValue ARMcc;
5793 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5794
5795 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5796 // Reverse the condition code.
5798 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5800 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5801 }
5802 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5803
5804 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5805 OverflowCmp);
5806 }
5807
5808 if (LHS.getValueType() == MVT::i32) {
5809 SDValue ARMcc;
5810 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5811 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5812 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5813 Chain, Dest, ARMcc, CCR, Cmp);
5814 }
5815
5816 if (getTargetMachine().Options.UnsafeFPMath &&
5817 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5818 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5819 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5820 return Result;
5821 }
5822
5823 ARMCC::CondCodes CondCode, CondCode2;
5824 FPCCToARMCC(CC, CondCode, CondCode2);
5825
5826 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5827 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5828 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5829 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5830 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5831 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5832 if (CondCode2 != ARMCC::AL) {
5833 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5834 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5835 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5836 }
5837 return Res;
5838}
5839
5840SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5841 SDValue Chain = Op.getOperand(0);
5842 SDValue Table = Op.getOperand(1);
5843 SDValue Index = Op.getOperand(2);
5844 SDLoc dl(Op);
5845
5846 EVT PTy = getPointerTy(DAG.getDataLayout());
5847 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5848 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5849 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5850 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5851 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5852 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5853 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5854 // which does another jump to the destination. This also makes it easier
5855 // to translate it to TBB / TBH later (Thumb2 only).
5856 // FIXME: This might not work if the function is extremely large.
5857 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5858 Addr, Op.getOperand(2), JTI);
5859 }
5860 if (isPositionIndependent() || Subtarget->isROPI()) {
5861 Addr =
5862 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5864 Chain = Addr.getValue(1);
5865 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5866 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5867 } else {
5868 Addr =
5869 DAG.getLoad(PTy, dl, Chain, Addr,
5871 Chain = Addr.getValue(1);
5872 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5873 }
5874}
5875
5877 EVT VT = Op.getValueType();
5878 SDLoc dl(Op);
5879
5880 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5881 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5882 return Op;
5883 return DAG.UnrollVectorOp(Op.getNode());
5884 }
5885
5886 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5887
5888 EVT NewTy;
5889 const EVT OpTy = Op.getOperand(0).getValueType();
5890 if (OpTy == MVT::v4f32)
5891 NewTy = MVT::v4i32;
5892 else if (OpTy == MVT::v4f16 && HasFullFP16)
5893 NewTy = MVT::v4i16;
5894 else if (OpTy == MVT::v8f16 && HasFullFP16)
5895 NewTy = MVT::v8i16;
5896 else
5897 llvm_unreachable("Invalid type for custom lowering!");
5898
5899 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5900 return DAG.UnrollVectorOp(Op.getNode());
5901
5902 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5903 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5904}
5905
5906SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5907 EVT VT = Op.getValueType();
5908 if (VT.isVector())
5909 return LowerVectorFP_TO_INT(Op, DAG);
5910
5911 bool IsStrict = Op->isStrictFPOpcode();
5912 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5913
5914 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5915 RTLIB::Libcall LC;
5916 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5917 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5918 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5919 Op.getValueType());
5920 else
5921 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5922 Op.getValueType());
5923 SDLoc Loc(Op);
5924 MakeLibCallOptions CallOptions;
5925 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5927 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5928 CallOptions, Loc, Chain);
5929 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5930 }
5931
5932 // FIXME: Remove this when we have strict fp instruction selection patterns
5933 if (IsStrict) {
5934 SDLoc Loc(Op);
5935 SDValue Result =
5938 Loc, Op.getValueType(), SrcVal);
5939 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5940 }
5941
5942 return Op;
5943}
5944
5946 const ARMSubtarget *Subtarget) {
5947 EVT VT = Op.getValueType();
5948 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5949 EVT FromVT = Op.getOperand(0).getValueType();
5950
5951 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5952 return Op;
5953 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5954 Subtarget->hasFP64())
5955 return Op;
5956 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5957 Subtarget->hasFullFP16())
5958 return Op;
5959 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5960 Subtarget->hasMVEFloatOps())
5961 return Op;
5962 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5963 Subtarget->hasMVEFloatOps())
5964 return Op;
5965
5966 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5967 return SDValue();
5968
5969 SDLoc DL(Op);
5970 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5971 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5972 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5973 DAG.getValueType(VT.getScalarType()));
5974 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5975 DAG.getConstant((1 << BW) - 1, DL, VT));
5976 if (IsSigned)
5977 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5978 DAG.getConstant(-(1 << BW), DL, VT));
5979 return Max;
5980}
5981
5983 EVT VT = Op.getValueType();
5984 SDLoc dl(Op);
5985
5986 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5987 if (VT.getVectorElementType() == MVT::f32)
5988 return Op;
5989 return DAG.UnrollVectorOp(Op.getNode());
5990 }
5991
5992 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5993 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5994 "Invalid type for custom lowering!");
5995
5996 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5997
5998 EVT DestVecType;
5999 if (VT == MVT::v4f32)
6000 DestVecType = MVT::v4i32;
6001 else if (VT == MVT::v4f16 && HasFullFP16)
6002 DestVecType = MVT::v4i16;
6003 else if (VT == MVT::v8f16 && HasFullFP16)
6004 DestVecType = MVT::v8i16;
6005 else
6006 return DAG.UnrollVectorOp(Op.getNode());
6007
6008 unsigned CastOpc;
6009 unsigned Opc;
6010 switch (Op.getOpcode()) {
6011 default: llvm_unreachable("Invalid opcode!");
6012 case ISD::SINT_TO_FP:
6013 CastOpc = ISD::SIGN_EXTEND;
6014 Opc = ISD::SINT_TO_FP;
6015 break;
6016 case ISD::UINT_TO_FP:
6017 CastOpc = ISD::ZERO_EXTEND;
6018 Opc = ISD::UINT_TO_FP;
6019 break;
6020 }
6021
6022 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
6023 return DAG.getNode(Opc, dl, VT, Op);
6024}
6025
6026SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
6027 EVT VT = Op.getValueType();
6028 if (VT.isVector())
6029 return LowerVectorINT_TO_FP(Op, DAG);
6030 if (isUnsupportedFloatingType(VT)) {
6031 RTLIB::Libcall LC;
6032 if (Op.getOpcode() == ISD::SINT_TO_FP)
6033 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6034 Op.getValueType());
6035 else
6036 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6037 Op.getValueType());
6038 MakeLibCallOptions CallOptions;
6039 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6040 CallOptions, SDLoc(Op)).first;
6041 }
6042
6043 return Op;
6044}
6045
6046SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6047 // Implement fcopysign with a fabs and a conditional fneg.
6048 SDValue Tmp0 = Op.getOperand(0);
6049 SDValue Tmp1 = Op.getOperand(1);
6050 SDLoc dl(Op);
6051 EVT VT = Op.getValueType();
6052 EVT SrcVT = Tmp1.getValueType();
6053 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6054 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6055 bool UseNEON = !InGPR && Subtarget->hasNEON();
6056
6057 if (UseNEON) {
6058 // Use VBSL to copy the sign bit.
6059 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6060 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6061 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6062 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6063 if (VT == MVT::f64)
6064 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6065 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6066 DAG.getConstant(32, dl, MVT::i32));
6067 else /*if (VT == MVT::f32)*/
6068 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6069 if (SrcVT == MVT::f32) {
6070 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6071 if (VT == MVT::f64)
6072 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6073 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6074 DAG.getConstant(32, dl, MVT::i32));
6075 } else if (VT == MVT::f32)
6076 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6077 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6078 DAG.getConstant(32, dl, MVT::i32));
6079 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6080 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6081
6083 dl, MVT::i32);
6084 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6085 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6086 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6087
6088 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6089 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6090 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6091 if (VT == MVT::f32) {
6092 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6093 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6094 DAG.getConstant(0, dl, MVT::i32));
6095 } else {
6096 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6097 }
6098
6099 return Res;
6100 }
6101
6102 // Bitcast operand 1 to i32.
6103 if (SrcVT == MVT::f64)
6104 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6105 Tmp1).getValue(1);
6106 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6107
6108 // Or in the signbit with integer operations.
6109 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6110 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6111 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6112 if (VT == MVT::f32) {
6113 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6114 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6115 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6116 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6117 }
6118
6119 // f64: Or the high part with signbit and then combine two parts.
6120 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6121 Tmp0);
6122 SDValue Lo = Tmp0.getValue(0);
6123 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6124 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6125 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6126}
6127
6128SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6130 MachineFrameInfo &MFI = MF.getFrameInfo();
6131 MFI.setReturnAddressIsTaken(true);
6132
6134 return SDValue();
6135
6136 EVT VT = Op.getValueType();
6137 SDLoc dl(Op);
6138 unsigned Depth = Op.getConstantOperandVal(0);
6139 if (Depth) {
6140 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6141 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6142 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6143 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6145 }
6146
6147 // Return LR, which contains the return address. Mark it an implicit live-in.
6148 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6149 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6150}
6151
6152SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6153 const ARMBaseRegisterInfo &ARI =
6154 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6156 MachineFrameInfo &MFI = MF.getFrameInfo();
6157 MFI.setFrameAddressIsTaken(true);
6158
6159 EVT VT = Op.getValueType();
6160 SDLoc dl(Op); // FIXME probably not meaningful
6161 unsigned Depth = Op.getConstantOperandVal(0);
6162 Register FrameReg = ARI.getFrameRegister(MF);
6163 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6164 while (Depth--)
6165 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6167 return FrameAddr;
6168}
6169
6170// FIXME? Maybe this could be a TableGen attribute on some registers and
6171// this table could be generated automatically from RegInfo.
6172Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6173 const MachineFunction &MF) const {
6175 .Case("sp", ARM::SP)
6176 .Default(0);
6177 if (Reg)
6178 return Reg;
6179 report_fatal_error(Twine("Invalid register name \""
6180 + StringRef(RegName) + "\"."));
6181}
6182
6183// Result is 64 bit value so split into two 32 bit values and return as a
6184// pair of values.
6186 SelectionDAG &DAG) {
6187 SDLoc DL(N);
6188
6189 // This function is only supposed to be called for i64 type destination.
6190 assert(N->getValueType(0) == MVT::i64
6191 && "ExpandREAD_REGISTER called for non-i64 type result.");
6192
6194 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6195 N->getOperand(0),
6196 N->getOperand(1));
6197
6198 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6199 Read.getValue(1)));
6200 Results.push_back(Read.getOperand(0));
6201}
6202
6203/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6204/// When \p DstVT, the destination type of \p BC, is on the vector
6205/// register bank and the source of bitcast, \p Op, operates on the same bank,
6206/// it might be possible to combine them, such that everything stays on the
6207/// vector register bank.
6208/// \p return The node that would replace \p BT, if the combine
6209/// is possible.
6211 SelectionDAG &DAG) {
6212 SDValue Op = BC->getOperand(0);
6213 EVT DstVT = BC->getValueType(0);
6214
6215 // The only vector instruction that can produce a scalar (remember,
6216 // since the bitcast was about to be turned into VMOVDRR, the source
6217 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6218 // Moreover, we can do this combine only if there is one use.
6219 // Finally, if the destination type is not a vector, there is not
6220 // much point on forcing everything on the vector bank.
6221 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6222 !Op.hasOneUse())
6223 return SDValue();
6224
6225 // If the index is not constant, we will introduce an additional
6226 // multiply that will stick.
6227 // Give up in that case.
6228 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6229 if (!Index)
6230 return SDValue();
6231 unsigned DstNumElt = DstVT.getVectorNumElements();
6232
6233 // Compute the new index.
6234 const APInt &APIntIndex = Index->getAPIntValue();
6235 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6236 NewIndex *= APIntIndex;
6237 // Check if the new constant index fits into i32.
6238 if (NewIndex.getBitWidth() > 32)
6239 return SDValue();
6240
6241 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6242 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6243 SDLoc dl(Op);
6244 SDValue ExtractSrc = Op.getOperand(0);
6245 EVT VecVT = EVT::getVectorVT(
6246 *DAG.getContext(), DstVT.getScalarType(),
6247 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6248 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6249 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6250 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6251}
6252
6253/// ExpandBITCAST - If the target supports VFP, this function is called to
6254/// expand a bit convert where either the source or destination type is i64 to
6255/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6256/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6257/// vectors), since the legalizer won't know what to do with that.
6258SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6259 const ARMSubtarget *Subtarget) const {
6260 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6261 SDLoc dl(N);
6262 SDValue Op = N->getOperand(0);
6263
6264 // This function is only supposed to be called for i16 and i64 types, either
6265 // as the source or destination of the bit convert.
6266 EVT SrcVT = Op.getValueType();
6267 EVT DstVT = N->getValueType(0);
6268
6269 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6270 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6271 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6272 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6273
6274 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6275 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6276 return DAG.getNode(
6277 ISD::TRUNCATE, SDLoc(N), DstVT,
6278 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6279
6280 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6281 return SDValue();
6282
6283 // Turn i64->f64 into VMOVDRR.
6284 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6285 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6286 // if we can combine the bitcast with its source.
6288 return Val;
6289 SDValue Lo, Hi;
6290 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6291 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6292 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6293 }
6294
6295 // Turn f64->i64 into VMOVRRD.
6296 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6297 SDValue Cvt;
6298 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6299 SrcVT.getVectorNumElements() > 1)
6300 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6301 DAG.getVTList(MVT::i32, MVT::i32),
6302 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6303 else
6304 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6305 DAG.getVTList(MVT::i32, MVT::i32), Op);
6306 // Merge the pieces into a single i64 value.
6307 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6308 }
6309
6310 return SDValue();
6311}
6312
6313/// getZeroVector - Returns a vector of specified type with all zero elements.
6314/// Zero vectors are used to represent vector negation and in those cases
6315/// will be implemented with the NEON VNEG instruction. However, VNEG does
6316/// not support i64 elements, so sometimes the zero vectors will need to be
6317/// explicitly constructed. Regardless, use a canonical VMOV to create the
6318/// zero vector.
6319static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6320 assert(VT.isVector() && "Expected a vector type");
6321 // The canonical modified immediate encoding of a zero vector is....0!
6322 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6323 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6324 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6325 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6326}
6327
6328/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6329/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6330SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6331 SelectionDAG &DAG) const {
6332 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6333 EVT VT = Op.getValueType();
6334 unsigned VTBits = VT.getSizeInBits();
6335 SDLoc dl(Op);
6336 SDValue ShOpLo = Op.getOperand(0);
6337 SDValue ShOpHi = Op.getOperand(1);
6338 SDValue ShAmt = Op.getOperand(2);
6339 SDValue ARMcc;
6340 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6341 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6342
6343 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6344
6345 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6346 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6347 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6348 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6349 DAG.getConstant(VTBits, dl, MVT::i32));
6350 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6351 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6352 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6353 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6354 ISD::SETGE, ARMcc, DAG, dl);
6355 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6356 ARMcc, CCR, CmpLo);
6357
6358 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6359 SDValue HiBigShift = Opc == ISD::SRA
6360 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6361 DAG.getConstant(VTBits - 1, dl, VT))
6362 : DAG.getConstant(0, dl, VT);
6363 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6364 ISD::SETGE, ARMcc, DAG, dl);
6365 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6366 ARMcc, CCR, CmpHi);
6367
6368 SDValue Ops[2] = { Lo, Hi };
6369 return DAG.getMergeValues(Ops, dl);
6370}
6371
6372/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6373/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6374SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6375 SelectionDAG &DAG) const {
6376 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6377 EVT VT = Op.getValueType();
6378 unsigned VTBits = VT.getSizeInBits();
6379 SDLoc dl(Op);
6380 SDValue ShOpLo = Op.getOperand(0);
6381 SDValue ShOpHi = Op.getOperand(1);
6382 SDValue ShAmt = Op.getOperand(2);
6383 SDValue ARMcc;
6384 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6385
6386 assert(Op.getOpcode() == ISD::SHL_PARTS);
6387 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6388 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6389 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6390 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6391 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6392
6393 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6394 DAG.getConstant(VTBits, dl, MVT::i32));
6395 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6396 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6397 ISD::SETGE, ARMcc, DAG, dl);
6398 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6399 ARMcc, CCR, CmpHi);
6400
6401 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6402 ISD::SETGE, ARMcc, DAG, dl);
6403 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6404 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6405 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6406
6407 SDValue Ops[2] = { Lo, Hi };
6408 return DAG.getMergeValues(Ops, dl);
6409}
6410
6411SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6412 SelectionDAG &DAG) const {
6413 // The rounding mode is in bits 23:22 of the FPSCR.
6414 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6415 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6416 // so that the shift + and get folded into a bitfield extract.
6417 SDLoc dl(Op);
6418 SDValue Chain = Op.getOperand(0);
6419 SDValue Ops[] = {Chain,
6420 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6421
6422 SDValue FPSCR =
6423 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6424 Chain = FPSCR.getValue(1);
6425 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6426 DAG.getConstant(1U << 22, dl, MVT::i32));
6427 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6428 DAG.getConstant(22, dl, MVT::i32));
6429 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6430 DAG.getConstant(3, dl, MVT::i32));
6431 return DAG.getMergeValues({And, Chain}, dl);
6432}
6433
6434SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6435 SelectionDAG &DAG) const {
6436 SDLoc DL(Op);
6437 SDValue Chain = Op->getOperand(0);
6438 SDValue RMValue = Op->getOperand(1);
6439
6440 // The rounding mode is in bits 23:22 of the FPSCR.
6441 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6442 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6443 // ((arg - 1) & 3) << 22).
6444 //
6445 // It is expected that the argument of llvm.set.rounding is within the
6446 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6447 // responsibility of the code generated llvm.set.rounding to ensure this
6448 // condition.
6449
6450 // Calculate new value of FPSCR[23:22].
6451 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6452 DAG.getConstant(1, DL, MVT::i32));
6453 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6454 DAG.getConstant(0x3, DL, MVT::i32));
6455 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6456 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6457
6458 // Get current value of FPSCR.
6459 SDValue Ops[] = {Chain,
6460 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6461 SDValue FPSCR =
6462 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6463 Chain = FPSCR.getValue(1);
6464 FPSCR = FPSCR.getValue(0);
6465
6466 // Put new rounding mode into FPSCR[23:22].
6467 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6468 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6469 DAG.getConstant(RMMask, DL, MVT::i32));
6470 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6471 SDValue Ops2[] = {
6472 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6473 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6474}
6475
6476SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6477 SelectionDAG &DAG) const {
6478 SDLoc DL(Op);
6479 SDValue Chain = Op->getOperand(0);
6480 SDValue Mode = Op->getOperand(1);
6481
6482 // Generate nodes to build:
6483 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6484 SDValue Ops[] = {Chain,
6485 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6486 SDValue FPSCR =
6487 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6488 Chain = FPSCR.getValue(1);
6489 FPSCR = FPSCR.getValue(0);
6490
6491 SDValue FPSCRMasked =
6492 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6493 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6494 SDValue InputMasked =
6495 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6496 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6497 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6498
6499 SDValue Ops2[] = {
6500 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6501 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6502}
6503
6504SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6505 SelectionDAG &DAG) const {
6506 SDLoc DL(Op);
6507 SDValue Chain = Op->getOperand(0);
6508
6509 // To get the default FP mode all control bits are cleared:
6510 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6511 SDValue Ops[] = {Chain,
6512 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6513 SDValue FPSCR =
6514 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6515 Chain = FPSCR.getValue(1);
6516 FPSCR = FPSCR.getValue(0);
6517
6518 SDValue FPSCRMasked = DAG.getNode(
6519 ISD::AND, DL, MVT::i32, FPSCR,
6521 SDValue Ops2[] = {Chain,
6522 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6523 FPSCRMasked};
6524 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6525}
6526
6528 const ARMSubtarget *ST) {
6529 SDLoc dl(N);
6530 EVT VT = N->getValueType(0);
6531 if (VT.isVector() && ST->hasNEON()) {
6532
6533 // Compute the least significant set bit: LSB = X & -X
6534 SDValue X = N->getOperand(0);
6535 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6536 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6537
6538 EVT ElemTy = VT.getVectorElementType();
6539
6540 if (ElemTy == MVT::i8) {
6541 // Compute with: cttz(x) = ctpop(lsb - 1)
6542 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6543 DAG.getTargetConstant(1, dl, ElemTy));
6544 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6545 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6546 }
6547
6548 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6549 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6550 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6551 unsigned NumBits = ElemTy.getSizeInBits();
6552 SDValue WidthMinus1 =
6553 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6554 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6555 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6556 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6557 }
6558
6559 // Compute with: cttz(x) = ctpop(lsb - 1)
6560
6561 // Compute LSB - 1.
6562 SDValue Bits;
6563 if (ElemTy == MVT::i64) {
6564 // Load constant 0xffff'ffff'ffff'ffff to register.
6565 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6566 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6567 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6568 } else {
6569 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6570 DAG.getTargetConstant(1, dl, ElemTy));
6571 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6572 }
6573 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6574 }
6575
6576 if (!ST->hasV6T2Ops())
6577 return SDValue();
6578
6579 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6580 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6581}
6582
6584 const ARMSubtarget *ST) {
6585 EVT VT = N->getValueType(0);
6586 SDLoc DL(N);
6587
6588 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6589 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6590 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6591 "Unexpected type for custom ctpop lowering");
6592
6593 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6594 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6595 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6596 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6597
6598 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6599 unsigned EltSize = 8;
6600 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6601 while (EltSize != VT.getScalarSizeInBits()) {
6603 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6604 TLI.getPointerTy(DAG.getDataLayout())));
6605 Ops.push_back(Res);
6606
6607 EltSize *= 2;
6608 NumElts /= 2;
6609 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6610 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6611 }
6612
6613 return Res;
6614}
6615
6616/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6617/// operand of a vector shift operation, where all the elements of the
6618/// build_vector must have the same constant integer value.
6619static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6620 // Ignore bit_converts.
6621 while (Op.getOpcode() == ISD::BITCAST)
6622 Op = Op.getOperand(0);
6623 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6624 APInt SplatBits, SplatUndef;
6625 unsigned SplatBitSize;
6626 bool HasAnyUndefs;
6627 if (!BVN ||
6628 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6629 ElementBits) ||
6630 SplatBitSize > ElementBits)
6631 return false;
6632 Cnt = SplatBits.getSExtValue();
6633 return true;
6634}
6635
6636/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6637/// operand of a vector shift left operation. That value must be in the range:
6638/// 0 <= Value < ElementBits for a left shift; or
6639/// 0 <= Value <= ElementBits for a long left shift.
6640static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6641 assert(VT.isVector() && "vector shift count is not a vector type");
6642 int64_t ElementBits = VT.getScalarSizeInBits();
6643 if (!getVShiftImm(Op, ElementBits, Cnt))
6644 return false;
6645 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6646}
6647
6648/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6649/// operand of a vector shift right operation. For a shift opcode, the value
6650/// is positive, but for an intrinsic the value count must be negative. The
6651/// absolute value must be in the range:
6652/// 1 <= |Value| <= ElementBits for a right shift; or
6653/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6654static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6655 int64_t &Cnt) {
6656 assert(VT.isVector() && "vector shift count is not a vector type");
6657 int64_t ElementBits = VT.getScalarSizeInBits();
6658 if (!getVShiftImm(Op, ElementBits, Cnt))
6659 return false;
6660 if (!isIntrinsic)
6661 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6662 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6663 Cnt = -Cnt;
6664 return true;
6665 }
6666 return false;
6667}
6668
6670 const ARMSubtarget *ST) {
6671 EVT VT = N->getValueType(0);
6672 SDLoc dl(N);
6673 int64_t Cnt;
6674
6675 if (!VT.isVector())
6676 return SDValue();
6677
6678 // We essentially have two forms here. Shift by an immediate and shift by a
6679 // vector register (there are also shift by a gpr, but that is just handled
6680 // with a tablegen pattern). We cannot easily match shift by an immediate in
6681 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6682 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6683 // signed or unsigned, and a negative shift indicates a shift right).
6684 if (N->getOpcode() == ISD::SHL) {
6685 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6686 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6687 DAG.getConstant(Cnt, dl, MVT::i32));
6688 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6689 N->getOperand(1));
6690 }
6691
6692 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6693 "unexpected vector shift opcode");
6694
6695 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6696 unsigned VShiftOpc =
6697 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6698 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6699 DAG.getConstant(Cnt, dl, MVT::i32));
6700 }
6701
6702 // Other right shifts we don't have operations for (we use a shift left by a
6703 // negative number).
6704 EVT ShiftVT = N->getOperand(1).getValueType();
6705 SDValue NegatedCount = DAG.getNode(
6706 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6707 unsigned VShiftOpc =
6708 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6709 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6710}
6711
6713 const ARMSubtarget *ST) {
6714 EVT VT = N->getValueType(0);
6715 SDLoc dl(N);
6716
6717 // We can get here for a node like i32 = ISD::SHL i32, i64
6718 if (VT != MVT::i64)
6719 return SDValue();
6720
6721 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6722 N->getOpcode() == ISD::SHL) &&
6723 "Unknown shift to lower!");
6724
6725 unsigned ShOpc = N->getOpcode();
6726 if (ST->hasMVEIntegerOps()) {
6727 SDValue ShAmt = N->getOperand(1);
6728 unsigned ShPartsOpc = ARMISD::LSLL;
6729 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6730
6731 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6732 // then do the default optimisation
6733 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6734 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6735 return SDValue();
6736
6737 // Extract the lower 32 bits of the shift amount if it's not an i32
6738 if (ShAmt->getValueType(0) != MVT::i32)
6739 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6740
6741 if (ShOpc == ISD::SRL) {
6742 if (!Con)
6743 // There is no t2LSRLr instruction so negate and perform an lsll if the
6744 // shift amount is in a register, emulating a right shift.
6745 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6746 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6747 else
6748 // Else generate an lsrl on the immediate shift amount
6749 ShPartsOpc = ARMISD::LSRL;
6750 } else if (ShOpc == ISD::SRA)
6751 ShPartsOpc = ARMISD::ASRL;
6752
6753 // Split Lower/Upper 32 bits of the destination/source
6754 SDValue Lo, Hi;
6755 std::tie(Lo, Hi) =
6756 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6757 // Generate the shift operation as computed above
6758 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6759 ShAmt);
6760 // The upper 32 bits come from the second return value of lsll
6761 Hi = SDValue(Lo.getNode(), 1);
6762 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6763 }
6764
6765 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6766 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6767 return SDValue();
6768
6769 // If we are in thumb mode, we don't have RRX.
6770 if (ST->isThumb1Only())
6771 return SDValue();
6772
6773 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6774 SDValue Lo, Hi;
6775 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6776
6777 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6778 // captures the result into a carry flag.
6779 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6780 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6781
6782 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6783 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6784
6785 // Merge the pieces into a single i64 value.
6786 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6787}
6788
6790 const ARMSubtarget *ST) {
6791 bool Invert = false;
6792 bool Swap = false;
6793 unsigned Opc = ARMCC::AL;
6794
6795 SDValue Op0 = Op.getOperand(0);
6796 SDValue Op1 = Op.getOperand(1);
6797 SDValue CC = Op.getOperand(2);
6798 EVT VT = Op.getValueType();
6799 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6800 SDLoc dl(Op);
6801
6802 EVT CmpVT;
6803 if (ST->hasNEON())
6805 else {
6806 assert(ST->hasMVEIntegerOps() &&
6807 "No hardware support for integer vector comparison!");
6808
6809 if (Op.getValueType().getVectorElementType() != MVT::i1)
6810 return SDValue();
6811
6812 // Make sure we expand floating point setcc to scalar if we do not have
6813 // mve.fp, so that we can handle them from there.
6814 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6815 return SDValue();
6816
6817 CmpVT = VT;
6818 }
6819
6820 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6821 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6822 // Special-case integer 64-bit equality comparisons. They aren't legal,
6823 // but they can be lowered with a few vector instructions.
6824 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6825 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6826 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6827 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6828 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6829 DAG.getCondCode(ISD::SETEQ));
6830 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6831 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6832 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6833 if (SetCCOpcode == ISD::SETNE)
6834 Merged = DAG.getNOT(dl, Merged, CmpVT);
6835 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6836 return Merged;
6837 }
6838
6839 if (CmpVT.getVectorElementType() == MVT::i64)
6840 // 64-bit comparisons are not legal in general.
6841 return SDValue();
6842
6843 if (Op1.getValueType().isFloatingPoint()) {
6844 switch (SetCCOpcode) {
6845 default: llvm_unreachable("Illegal FP comparison");
6846 case ISD::SETUNE:
6847 case ISD::SETNE:
6848 if (ST->hasMVEFloatOps()) {
6849 Opc = ARMCC::NE; break;
6850 } else {
6851 Invert = true; [[fallthrough]];
6852 }
6853 case ISD::SETOEQ:
6854 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6855 case ISD::SETOLT:
6856 case ISD::SETLT: Swap = true; [[fallthrough]];
6857 case ISD::SETOGT:
6858 case ISD::SETGT: Opc = ARMCC::GT; break;
6859 case ISD::SETOLE:
6860 case ISD::SETLE: Swap = true; [[fallthrough]];
6861 case ISD::SETOGE:
6862 case ISD::SETGE: Opc = ARMCC::GE; break;
6863 case ISD::SETUGE: Swap = true; [[fallthrough]];
6864 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6865 case ISD::SETUGT: Swap = true; [[fallthrough]];
6866 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6867 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6868 case ISD::SETONE: {
6869 // Expand this to (OLT | OGT).
6870 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6871 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6872 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6873 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6874 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6875 if (Invert)
6876 Result = DAG.getNOT(dl, Result, VT);
6877 return Result;
6878 }
6879 case ISD::SETUO: Invert = true; [[fallthrough]];
6880 case ISD::SETO: {
6881 // Expand this to (OLT | OGE).
6882 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6883 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6884 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6885 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6886 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6887 if (Invert)
6888 Result = DAG.getNOT(dl, Result, VT);
6889 return Result;
6890 }
6891 }
6892 } else {
6893 // Integer comparisons.
6894 switch (SetCCOpcode) {
6895 default: llvm_unreachable("Illegal integer comparison");
6896 case ISD::SETNE:
6897 if (ST->hasMVEIntegerOps()) {
6898 Opc = ARMCC::NE; break;
6899 } else {
6900 Invert = true; [[fallthrough]];
6901 }
6902 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6903 case ISD::SETLT: Swap = true; [[fallthrough]];
6904 case ISD::SETGT: Opc = ARMCC::GT; break;
6905 case ISD::SETLE: Swap = true; [[fallthrough]];
6906 case ISD::SETGE: Opc = ARMCC::GE; break;
6907 case ISD::SETULT: Swap = true; [[fallthrough]];
6908 case ISD::SETUGT: Opc = ARMCC::HI; break;
6909 case ISD::SETULE: Swap = true; [[fallthrough]];
6910 case ISD::SETUGE: Opc = ARMCC::HS; break;
6911 }
6912
6913 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6914 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6915 SDValue AndOp;
6917 AndOp = Op0;
6918 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6919 AndOp = Op1;
6920
6921 // Ignore bitconvert.
6922 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6923 AndOp = AndOp.getOperand(0);
6924
6925 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6926 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6927 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6928 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6929 if (!Invert)
6930 Result = DAG.getNOT(dl, Result, VT);
6931 return Result;
6932 }
6933 }
6934 }
6935
6936 if (Swap)
6937 std::swap(Op0, Op1);
6938
6939 // If one of the operands is a constant vector zero, attempt to fold the
6940 // comparison to a specialized compare-against-zero form.
6942 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6943 Opc == ARMCC::NE)) {
6944 if (Opc == ARMCC::GE)
6945 Opc = ARMCC::LE;
6946 else if (Opc == ARMCC::GT)
6947 Opc = ARMCC::LT;
6948 std::swap(Op0, Op1);
6949 }
6950
6951 SDValue Result;
6953 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6954 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6955 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6956 DAG.getConstant(Opc, dl, MVT::i32));
6957 else
6958 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6959 DAG.getConstant(Opc, dl, MVT::i32));
6960
6961 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6962
6963 if (Invert)
6964 Result = DAG.getNOT(dl, Result, VT);
6965
6966 return Result;
6967}
6968
6970 SDValue LHS = Op.getOperand(0);
6971 SDValue RHS = Op.getOperand(1);
6972 SDValue Carry = Op.getOperand(2);
6973 SDValue Cond = Op.getOperand(3);
6974 SDLoc DL(Op);
6975
6976 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6977
6978 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6979 // have to invert the carry first.
6980 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6981 DAG.getConstant(1, DL, MVT::i32), Carry);
6982 // This converts the boolean value carry into the carry flag.
6983 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6984
6985 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6986 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6987
6988 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6989 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6990 SDValue ARMcc = DAG.getConstant(
6991 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6992 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6993 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6994 Cmp.getValue(1), SDValue());
6995 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6996 CCR, Chain.getValue(1));
6997}
6998
6999/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7000/// valid vector constant for a NEON or MVE instruction with a "modified
7001/// immediate" operand (e.g., VMOV). If so, return the encoded value.
7002static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
7003 unsigned SplatBitSize, SelectionDAG &DAG,
7004 const SDLoc &dl, EVT &VT, EVT VectorVT,
7005 VMOVModImmType type) {
7006 unsigned OpCmode, Imm;
7007 bool is128Bits = VectorVT.is128BitVector();
7008
7009 // SplatBitSize is set to the smallest size that splats the vector, so a
7010 // zero vector will always have SplatBitSize == 8. However, NEON modified
7011 // immediate instructions others than VMOV do not support the 8-bit encoding
7012 // of a zero vector, and the default encoding of zero is supposed to be the
7013 // 32-bit version.
7014 if (SplatBits == 0)
7015 SplatBitSize = 32;
7016
7017 switch (SplatBitSize) {
7018 case 8:
7019 if (type != VMOVModImm)
7020 return SDValue();
7021 // Any 1-byte value is OK. Op=0, Cmode=1110.
7022 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
7023 OpCmode = 0xe;
7024 Imm = SplatBits;
7025 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
7026 break;
7027
7028 case 16:
7029 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7030 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
7031 if ((SplatBits & ~0xff) == 0) {
7032 // Value = 0x00nn: Op=x, Cmode=100x.
7033 OpCmode = 0x8;
7034 Imm = SplatBits;
7035 break;
7036 }
7037 if ((SplatBits & ~0xff00) == 0) {
7038 // Value = 0xnn00: Op=x, Cmode=101x.
7039 OpCmode = 0xa;
7040 Imm = SplatBits >> 8;
7041 break;
7042 }
7043 return SDValue();
7044
7045 case 32:
7046 // NEON's 32-bit VMOV supports splat values where:
7047 // * only one byte is nonzero, or
7048 // * the least significant byte is 0xff and the second byte is nonzero, or
7049 // * the least significant 2 bytes are 0xff and the third is nonzero.
7050 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7051 if ((SplatBits & ~0xff) == 0) {
7052 // Value = 0x000000nn: Op=x, Cmode=000x.
7053 OpCmode = 0;
7054 Imm = SplatBits;
7055 break;
7056 }
7057 if ((SplatBits & ~0xff00) == 0) {
7058 // Value = 0x0000nn00: Op=x, Cmode=001x.
7059 OpCmode = 0x2;
7060 Imm = SplatBits >> 8;
7061 break;
7062 }
7063 if ((SplatBits & ~0xff0000) == 0) {
7064 // Value = 0x00nn0000: Op=x, Cmode=010x.
7065 OpCmode = 0x4;
7066 Imm = SplatBits >> 16;
7067 break;
7068 }
7069 if ((SplatBits & ~0xff000000) == 0) {
7070 // Value = 0xnn000000: Op=x, Cmode=011x.
7071 OpCmode = 0x6;
7072 Imm = SplatBits >> 24;
7073 break;
7074 }
7075
7076 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7077 if (type == OtherModImm) return SDValue();
7078
7079 if ((SplatBits & ~0xffff) == 0 &&
7080 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7081 // Value = 0x0000nnff: Op=x, Cmode=1100.
7082 OpCmode = 0xc;
7083 Imm = SplatBits >> 8;
7084 break;
7085 }
7086
7087 // cmode == 0b1101 is not supported for MVE VMVN
7088 if (type == MVEVMVNModImm)
7089 return SDValue();
7090
7091 if ((SplatBits & ~0xffffff) == 0 &&
7092 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7093 // Value = 0x00nnffff: Op=x, Cmode=1101.
7094 OpCmode = 0xd;
7095 Imm = SplatBits >> 16;
7096 break;
7097 }
7098
7099 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7100 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7101 // VMOV.I32. A (very) minor optimization would be to replicate the value
7102 // and fall through here to test for a valid 64-bit splat. But, then the
7103 // caller would also need to check and handle the change in size.
7104 return SDValue();
7105
7106 case 64: {
7107 if (type != VMOVModImm)
7108 return SDValue();
7109 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7110 uint64_t BitMask = 0xff;
7111 unsigned ImmMask = 1;
7112 Imm = 0;
7113 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7114 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7115 Imm |= ImmMask;
7116 } else if ((SplatBits & BitMask) != 0) {
7117 return SDValue();
7118 }
7119 BitMask <<= 8;
7120 ImmMask <<= 1;
7121 }
7122
7123 if (DAG.getDataLayout().isBigEndian()) {
7124 // Reverse the order of elements within the vector.
7125 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7126 unsigned Mask = (1 << BytesPerElem) - 1;
7127 unsigned NumElems = 8 / BytesPerElem;
7128 unsigned NewImm = 0;
7129 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7130 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7131 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7132 }
7133 Imm = NewImm;
7134 }
7135
7136 // Op=1, Cmode=1110.
7137 OpCmode = 0x1e;
7138 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7139 break;
7140 }
7141
7142 default:
7143 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7144 }
7145
7146 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7147 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7148}
7149
7150SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7151 const ARMSubtarget *ST) const {
7152 EVT VT = Op.getValueType();
7153 bool IsDouble = (VT == MVT::f64);
7154 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7155 const APFloat &FPVal = CFP->getValueAPF();
7156
7157 // Prevent floating-point constants from using literal loads
7158 // when execute-only is enabled.
7159 if (ST->genExecuteOnly()) {
7160 // We shouldn't trigger this for v6m execute-only
7161 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7162 "Unexpected architecture");
7163
7164 // If we can represent the constant as an immediate, don't lower it
7165 if (isFPImmLegal(FPVal, VT))
7166 return Op;
7167 // Otherwise, construct as integer, and move to float register
7168 APInt INTVal = FPVal.bitcastToAPInt();
7169 SDLoc DL(CFP);
7170 switch (VT.getSimpleVT().SimpleTy) {
7171 default:
7172 llvm_unreachable("Unknown floating point type!");
7173 break;
7174 case MVT::f64: {
7175 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7176 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7177 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7178 }
7179 case MVT::f32:
7180 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7181 DAG.getConstant(INTVal, DL, MVT::i32));
7182 }
7183 }
7184
7185 if (!ST->hasVFP3Base())
7186 return SDValue();
7187
7188 // Use the default (constant pool) lowering for double constants when we have
7189 // an SP-only FPU
7190 if (IsDouble && !Subtarget->hasFP64())
7191 return SDValue();
7192
7193 // Try splatting with a VMOV.f32...
7194 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7195
7196 if (ImmVal != -1) {
7197 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7198 // We have code in place to select a valid ConstantFP already, no need to
7199 // do any mangling.
7200 return Op;
7201 }
7202
7203 // It's a float and we are trying to use NEON operations where
7204 // possible. Lower it to a splat followed by an extract.
7205 SDLoc DL(Op);
7206 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7207 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7208 NewVal);
7209 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7210 DAG.getConstant(0, DL, MVT::i32));
7211 }
7212
7213 // The rest of our options are NEON only, make sure that's allowed before
7214 // proceeding..
7215 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7216 return SDValue();
7217
7218 EVT VMovVT;
7219 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7220
7221 // It wouldn't really be worth bothering for doubles except for one very
7222 // important value, which does happen to match: 0.0. So make sure we don't do
7223 // anything stupid.
7224 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7225 return SDValue();
7226
7227 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7228 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7229 VMovVT, VT, VMOVModImm);
7230 if (NewVal != SDValue()) {
7231 SDLoc DL(Op);
7232 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7233 NewVal);
7234 if (IsDouble)
7235 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7236
7237 // It's a float: cast and extract a vector element.
7238 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7239 VecConstant);
7240 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7241 DAG.getConstant(0, DL, MVT::i32));
7242 }
7243
7244 // Finally, try a VMVN.i32
7245 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7246 VT, VMVNModImm);
7247 if (NewVal != SDValue()) {
7248 SDLoc DL(Op);
7249 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7250
7251 if (IsDouble)
7252 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7253
7254 // It's a float: cast and extract a vector element.
7255 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7256 VecConstant);
7257 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7258 DAG.getConstant(0, DL, MVT::i32));
7259 }
7260
7261 return SDValue();
7262}
7263
7264// check if an VEXT instruction can handle the shuffle mask when the
7265// vector sources of the shuffle are the same.
7266static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7267 unsigned NumElts = VT.getVectorNumElements();
7268
7269 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7270 if (M[0] < 0)
7271 return false;
7272
7273 Imm = M[0];
7274
7275 // If this is a VEXT shuffle, the immediate value is the index of the first
7276 // element. The other shuffle indices must be the successive elements after
7277 // the first one.
7278 unsigned ExpectedElt = Imm;
7279 for (unsigned i = 1; i < NumElts; ++i) {
7280 // Increment the expected index. If it wraps around, just follow it
7281 // back to index zero and keep going.
7282 ++ExpectedElt;
7283 if (ExpectedElt == NumElts)
7284 ExpectedElt = 0;
7285
7286 if (M[i] < 0) continue; // ignore UNDEF indices
7287 if (ExpectedElt != static_cast<unsigned>(M[i]))
7288 return false;
7289 }
7290
7291 return true;
7292}
7293
7294static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7295 bool &ReverseVEXT, unsigned &Imm) {
7296 unsigned NumElts = VT.getVectorNumElements();
7297 ReverseVEXT = false;
7298
7299 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7300 if (M[0] < 0)
7301 return false;
7302
7303 Imm = M[0];
7304
7305 // If this is a VEXT shuffle, the immediate value is the index of the first
7306 // element. The other shuffle indices must be the successive elements after
7307 // the first one.
7308 unsigned ExpectedElt = Imm;
7309 for (unsigned i = 1; i < NumElts; ++i) {
7310 // Increment the expected index. If it wraps around, it may still be
7311 // a VEXT but the source vectors must be swapped.
7312 ExpectedElt += 1;
7313 if (ExpectedElt == NumElts * 2) {
7314 ExpectedElt = 0;
7315 ReverseVEXT = true;
7316 }
7317
7318 if (M[i] < 0) continue; // ignore UNDEF indices
7319 if (ExpectedElt != static_cast<unsigned>(M[i]))
7320 return false;
7321 }
7322
7323 // Adjust the index value if the source operands will be swapped.
7324 if (ReverseVEXT)
7325 Imm -= NumElts;
7326
7327 return true;
7328}
7329
7330static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7331 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7332 // range, then 0 is placed into the resulting vector. So pretty much any mask
7333 // of 8 elements can work here.
7334 return VT == MVT::v8i8 && M.size() == 8;
7335}
7336
7337static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7338 unsigned Index) {
7339 if (Mask.size() == Elements * 2)
7340 return Index / Elements;
7341 return Mask[Index] == 0 ? 0 : 1;
7342}
7343
7344// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7345// checking that pairs of elements in the shuffle mask represent the same index
7346// in each vector, incrementing the expected index by 2 at each step.
7347// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7348// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7349// v2={e,f,g,h}
7350// WhichResult gives the offset for each element in the mask based on which
7351// of the two results it belongs to.
7352//
7353// The transpose can be represented either as:
7354// result1 = shufflevector v1, v2, result1_shuffle_mask
7355// result2 = shufflevector v1, v2, result2_shuffle_mask
7356// where v1/v2 and the shuffle masks have the same number of elements
7357// (here WhichResult (see below) indicates which result is being checked)
7358//
7359// or as:
7360// results = shufflevector v1, v2, shuffle_mask
7361// where both results are returned in one vector and the shuffle mask has twice
7362// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7363// want to check the low half and high half of the shuffle mask as if it were
7364// the other case
7365static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7366 unsigned EltSz = VT.getScalarSizeInBits();
7367 if (EltSz == 64)
7368 return false;
7369
7370 unsigned NumElts = VT.getVectorNumElements();
7371 if (M.size() != NumElts && M.size() != NumElts*2)
7372 return false;
7373
7374 // If the mask is twice as long as the input vector then we need to check the
7375 // upper and lower parts of the mask with a matching value for WhichResult
7376 // FIXME: A mask with only even values will be rejected in case the first
7377 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7378 // M[0] is used to determine WhichResult
7379 for (unsigned i = 0; i < M.size(); i += NumElts) {
7380 WhichResult = SelectPairHalf(NumElts, M, i);
7381 for (unsigned j = 0; j < NumElts; j += 2) {
7382 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7383 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7384 return false;
7385 }
7386 }
7387
7388 if (M.size() == NumElts*2)
7389 WhichResult = 0;
7390
7391 return true;
7392}
7393
7394/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7395/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7396/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7397static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7398 unsigned EltSz = VT.getScalarSizeInBits();
7399 if (EltSz == 64)
7400 return false;
7401
7402 unsigned NumElts = VT.getVectorNumElements();
7403 if (M.size() != NumElts && M.size() != NumElts*2)
7404 return false;
7405
7406 for (unsigned i = 0; i < M.size(); i += NumElts) {
7407 WhichResult = SelectPairHalf(NumElts, M, i);
7408 for (unsigned j = 0; j < NumElts; j += 2) {
7409 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7410 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7411 return false;
7412 }
7413 }
7414
7415 if (M.size() == NumElts*2)
7416 WhichResult = 0;
7417
7418 return true;
7419}
7420
7421// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7422// that the mask elements are either all even and in steps of size 2 or all odd
7423// and in steps of size 2.
7424// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7425// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7426// v2={e,f,g,h}
7427// Requires similar checks to that of isVTRNMask with
7428// respect the how results are returned.
7429static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7430 unsigned EltSz = VT.getScalarSizeInBits();
7431 if (EltSz == 64)
7432 return false;
7433
7434 unsigned NumElts = VT.getVectorNumElements();
7435 if (M.size() != NumElts && M.size() != NumElts*2)
7436 return false;
7437
7438 for (unsigned i = 0; i < M.size(); i += NumElts) {
7439 WhichResult = SelectPairHalf(NumElts, M, i);
7440 for (unsigned j = 0; j < NumElts; ++j) {
7441 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7442 return false;
7443 }
7444 }
7445
7446 if (M.size() == NumElts*2)
7447 WhichResult = 0;
7448
7449 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7450 if (VT.is64BitVector() && EltSz == 32)
7451 return false;
7452
7453 return true;
7454}
7455
7456/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7457/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7458/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7459static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7460 unsigned EltSz = VT.getScalarSizeInBits();
7461 if (EltSz == 64)
7462 return false;
7463
7464 unsigned NumElts = VT.getVectorNumElements();
7465 if (M.size() != NumElts && M.size() != NumElts*2)
7466 return false;
7467
7468 unsigned Half = NumElts / 2;
7469 for (unsigned i = 0; i < M.size(); i += NumElts) {
7470 WhichResult = SelectPairHalf(NumElts, M, i);
7471 for (unsigned j = 0; j < NumElts; j += Half) {
7472 unsigned Idx = WhichResult;
7473 for (unsigned k = 0; k < Half; ++k) {
7474 int MIdx = M[i + j + k];
7475 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7476 return false;
7477 Idx += 2;
7478 }
7479 }
7480 }
7481
7482 if (M.size() == NumElts*2)
7483 WhichResult = 0;
7484
7485 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7486 if (VT.is64BitVector() && EltSz == 32)
7487 return false;
7488
7489 return true;
7490}
7491
7492// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7493// that pairs of elements of the shufflemask represent the same index in each
7494// vector incrementing sequentially through the vectors.
7495// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7496// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7497// v2={e,f,g,h}
7498// Requires similar checks to that of isVTRNMask with respect the how results
7499// are returned.
7500static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7501 unsigned EltSz = VT.getScalarSizeInBits();
7502 if (EltSz == 64)
7503 return false;
7504
7505 unsigned NumElts = VT.getVectorNumElements();
7506 if (M.size() != NumElts && M.size() != NumElts*2)
7507 return false;
7508
7509 for (unsigned i = 0; i < M.size(); i += NumElts) {
7510 WhichResult = SelectPairHalf(NumElts, M, i);
7511 unsigned Idx = WhichResult * NumElts / 2;
7512 for (unsigned j = 0; j < NumElts; j += 2) {
7513 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7514 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7515 return false;
7516 Idx += 1;
7517 }
7518 }
7519
7520 if (M.size() == NumElts*2)
7521 WhichResult = 0;
7522
7523 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7524 if (VT.is64BitVector() && EltSz == 32)
7525 return false;
7526
7527 return true;
7528}
7529
7530/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7531/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7532/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7533static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7534 unsigned EltSz = VT.getScalarSizeInBits();
7535 if (EltSz == 64)
7536 return false;
7537
7538 unsigned NumElts = VT.getVectorNumElements();
7539 if (M.size() != NumElts && M.size() != NumElts*2)
7540 return false;
7541
7542 for (unsigned i = 0; i < M.size(); i += NumElts) {
7543 WhichResult = SelectPairHalf(NumElts, M, i);
7544 unsigned Idx = WhichResult * NumElts / 2;
7545 for (unsigned j = 0; j < NumElts; j += 2) {
7546 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7547 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7548 return false;
7549 Idx += 1;
7550 }
7551 }
7552
7553 if (M.size() == NumElts*2)
7554 WhichResult = 0;
7555
7556 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7557 if (VT.is64BitVector() && EltSz == 32)
7558 return false;
7559
7560 return true;
7561}
7562
7563/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7564/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7565static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7566 unsigned &WhichResult,
7567 bool &isV_UNDEF) {
7568 isV_UNDEF = false;
7569 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7570 return ARMISD::VTRN;
7571 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7572 return ARMISD::VUZP;
7573 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7574 return ARMISD::VZIP;
7575
7576 isV_UNDEF = true;
7577 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7578 return ARMISD::VTRN;
7579 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7580 return ARMISD::VUZP;
7581 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7582 return ARMISD::VZIP;
7583
7584 return 0;
7585}
7586
7587/// \return true if this is a reverse operation on an vector.
7588static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7589 unsigned NumElts = VT.getVectorNumElements();
7590 // Make sure the mask has the right size.
7591 if (NumElts != M.size())
7592 return false;
7593
7594 // Look for <15, ..., 3, -1, 1, 0>.
7595 for (unsigned i = 0; i != NumElts; ++i)
7596 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7597 return false;
7598
7599 return true;
7600}
7601
7602static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7603 unsigned NumElts = VT.getVectorNumElements();
7604 // Make sure the mask has the right size.
7605 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7606 return false;
7607
7608 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7609 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7610 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7611 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7612 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7613 int Ofs = Top ? 1 : 0;
7614 int Upper = SingleSource ? 0 : NumElts;
7615 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7616 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7617 return false;
7618 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7619 return false;
7620 }
7621 return true;
7622}
7623
7624static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7625 unsigned NumElts = VT.getVectorNumElements();
7626 // Make sure the mask has the right size.
7627 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7628 return false;
7629
7630 // If Top
7631 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7632 // This inserts Input2 into Input1
7633 // else if not Top
7634 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7635 // This inserts Input1 into Input2
7636 unsigned Offset = Top ? 0 : 1;
7637 unsigned N = SingleSource ? 0 : NumElts;
7638 for (unsigned i = 0; i < NumElts; i += 2) {
7639 if (M[i] >= 0 && M[i] != (int)i)
7640 return false;
7641 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7642 return false;
7643 }
7644
7645 return true;
7646}
7647
7648static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7649 unsigned NumElts = ToVT.getVectorNumElements();
7650 if (NumElts != M.size())
7651 return false;
7652
7653 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7654 // looking for patterns of:
7655 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7656 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7657
7658 unsigned Off0 = rev ? NumElts / 2 : 0;
7659 unsigned Off1 = rev ? 0 : NumElts / 2;
7660 for (unsigned i = 0; i < NumElts; i += 2) {
7661 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7662 return false;
7663 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7664 return false;
7665 }
7666
7667 return true;
7668}
7669
7670// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7671// from a pair of inputs. For example:
7672// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7673// FP_ROUND(EXTRACT_ELT(Y, 0),
7674// FP_ROUND(EXTRACT_ELT(X, 1),
7675// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7677 const ARMSubtarget *ST) {
7678 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7679 if (!ST->hasMVEFloatOps())
7680 return SDValue();
7681
7682 SDLoc dl(BV);
7683 EVT VT = BV.getValueType();
7684 if (VT != MVT::v8f16)
7685 return SDValue();
7686
7687 // We are looking for a buildvector of fptrunc elements, where all the
7688 // elements are interleavingly extracted from two sources. Check the first two
7689 // items are valid enough and extract some info from them (they are checked
7690 // properly in the loop below).
7691 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7694 return SDValue();
7695 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7698 return SDValue();
7699 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7700 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7701 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7702 return SDValue();
7703
7704 // Check all the values in the BuildVector line up with our expectations.
7705 for (unsigned i = 1; i < 4; i++) {
7706 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7707 return Trunc.getOpcode() == ISD::FP_ROUND &&
7709 Trunc.getOperand(0).getOperand(0) == Op &&
7710 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7711 };
7712 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7713 return SDValue();
7714 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7715 return SDValue();
7716 }
7717
7718 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7719 DAG.getConstant(0, dl, MVT::i32));
7720 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7721 DAG.getConstant(1, dl, MVT::i32));
7722}
7723
7724// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7725// from a single input on alternating lanes. For example:
7726// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7727// FP_ROUND(EXTRACT_ELT(X, 2),
7728// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7730 const ARMSubtarget *ST) {
7731 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7732 if (!ST->hasMVEFloatOps())
7733 return SDValue();
7734
7735 SDLoc dl(BV);
7736 EVT VT = BV.getValueType();
7737 if (VT != MVT::v4f32)
7738 return SDValue();
7739
7740 // We are looking for a buildvector of fptext elements, where all the
7741 // elements are alternating lanes from a single source. For example <0,2,4,6>
7742 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7743 // info from them (they are checked properly in the loop below).
7744 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7746 return SDValue();
7747 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7749 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7750 return SDValue();
7751
7752 // Check all the values in the BuildVector line up with our expectations.
7753 for (unsigned i = 1; i < 4; i++) {
7754 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7755 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7757 Trunc.getOperand(0).getOperand(0) == Op &&
7758 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7759 };
7760 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7761 return SDValue();
7762 }
7763
7764 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7765 DAG.getConstant(Offset, dl, MVT::i32));
7766}
7767
7768// If N is an integer constant that can be moved into a register in one
7769// instruction, return an SDValue of such a constant (will become a MOV
7770// instruction). Otherwise return null.
7772 const ARMSubtarget *ST, const SDLoc &dl) {
7773 uint64_t Val;
7774 if (!isa<ConstantSDNode>(N))
7775 return SDValue();
7776 Val = N->getAsZExtVal();
7777
7778 if (ST->isThumb1Only()) {
7779 if (Val <= 255 || ~Val <= 255)
7780 return DAG.getConstant(Val, dl, MVT::i32);
7781 } else {
7782 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7783 return DAG.getConstant(Val, dl, MVT::i32);
7784 }
7785 return SDValue();
7786}
7787
7789 const ARMSubtarget *ST) {
7790 SDLoc dl(Op);
7791 EVT VT = Op.getValueType();
7792
7793 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7794
7795 unsigned NumElts = VT.getVectorNumElements();
7796 unsigned BoolMask;
7797 unsigned BitsPerBool;
7798 if (NumElts == 2) {
7799 BitsPerBool = 8;
7800 BoolMask = 0xff;
7801 } else if (NumElts == 4) {
7802 BitsPerBool = 4;
7803 BoolMask = 0xf;
7804 } else if (NumElts == 8) {
7805 BitsPerBool = 2;
7806 BoolMask = 0x3;
7807 } else if (NumElts == 16) {
7808 BitsPerBool = 1;
7809 BoolMask = 0x1;
7810 } else
7811 return SDValue();
7812
7813 // If this is a single value copied into all lanes (a splat), we can just sign
7814 // extend that single value
7815 SDValue FirstOp = Op.getOperand(0);
7816 if (!isa<ConstantSDNode>(FirstOp) &&
7817 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7818 return U.get().isUndef() || U.get() == FirstOp;
7819 })) {
7820 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7821 DAG.getValueType(MVT::i1));
7822 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7823 }
7824
7825 // First create base with bits set where known
7826 unsigned Bits32 = 0;
7827 for (unsigned i = 0; i < NumElts; ++i) {
7828 SDValue V = Op.getOperand(i);
7829 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7830 continue;
7831 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7832 if (BitSet)
7833 Bits32 |= BoolMask << (i * BitsPerBool);
7834 }
7835
7836 // Add in unknown nodes
7838 DAG.getConstant(Bits32, dl, MVT::i32));
7839 for (unsigned i = 0; i < NumElts; ++i) {
7840 SDValue V = Op.getOperand(i);
7841 if (isa<ConstantSDNode>(V) || V.isUndef())
7842 continue;
7843 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7844 DAG.getConstant(i, dl, MVT::i32));
7845 }
7846
7847 return Base;
7848}
7849
7851 const ARMSubtarget *ST) {
7852 if (!ST->hasMVEIntegerOps())
7853 return SDValue();
7854
7855 // We are looking for a buildvector where each element is Op[0] + i*N
7856 EVT VT = Op.getValueType();
7857 SDValue Op0 = Op.getOperand(0);
7858 unsigned NumElts = VT.getVectorNumElements();
7859
7860 // Get the increment value from operand 1
7861 SDValue Op1 = Op.getOperand(1);
7862 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7863 !isa<ConstantSDNode>(Op1.getOperand(1)))
7864 return SDValue();
7865 unsigned N = Op1.getConstantOperandVal(1);
7866 if (N != 1 && N != 2 && N != 4 && N != 8)
7867 return SDValue();
7868
7869 // Check that each other operand matches
7870 for (unsigned I = 2; I < NumElts; I++) {
7871 SDValue OpI = Op.getOperand(I);
7872 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7873 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7874 OpI.getConstantOperandVal(1) != I * N)
7875 return SDValue();
7876 }
7877
7878 SDLoc DL(Op);
7879 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7880 DAG.getConstant(N, DL, MVT::i32));
7881}
7882
7883// Returns true if the operation N can be treated as qr instruction variant at
7884// operand Op.
7885static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7886 switch (N->getOpcode()) {
7887 case ISD::ADD:
7888 case ISD::MUL:
7889 case ISD::SADDSAT:
7890 case ISD::UADDSAT:
7891 return true;
7892 case ISD::SUB:
7893 case ISD::SSUBSAT:
7894 case ISD::USUBSAT:
7895 return N->getOperand(1).getNode() == Op;
7897 switch (N->getConstantOperandVal(0)) {
7898 case Intrinsic::arm_mve_add_predicated:
7899 case Intrinsic::arm_mve_mul_predicated:
7900 case Intrinsic::arm_mve_qadd_predicated:
7901 case Intrinsic::arm_mve_vhadd:
7902 case Intrinsic::arm_mve_hadd_predicated:
7903 case Intrinsic::arm_mve_vqdmulh:
7904 case Intrinsic::arm_mve_qdmulh_predicated:
7905 case Intrinsic::arm_mve_vqrdmulh:
7906 case Intrinsic::arm_mve_qrdmulh_predicated:
7907 case Intrinsic::arm_mve_vqdmull:
7908 case Intrinsic::arm_mve_vqdmull_predicated:
7909 return true;
7910 case Intrinsic::arm_mve_sub_predicated:
7911 case Intrinsic::arm_mve_qsub_predicated:
7912 case Intrinsic::arm_mve_vhsub:
7913 case Intrinsic::arm_mve_hsub_predicated:
7914 return N->getOperand(2).getNode() == Op;
7915 default:
7916 return false;
7917 }
7918 default:
7919 return false;
7920 }
7921}
7922
7923// If this is a case we can't handle, return null and let the default
7924// expansion code take care of it.
7925SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7926 const ARMSubtarget *ST) const {
7927 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7928 SDLoc dl(Op);
7929 EVT VT = Op.getValueType();
7930
7931 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7932 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7933
7934 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7935 return R;
7936
7937 APInt SplatBits, SplatUndef;
7938 unsigned SplatBitSize;
7939 bool HasAnyUndefs;
7940 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7941 if (SplatUndef.isAllOnes())
7942 return DAG.getUNDEF(VT);
7943
7944 // If all the users of this constant splat are qr instruction variants,
7945 // generate a vdup of the constant.
7946 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7947 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7948 all_of(BVN->uses(),
7949 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7950 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7951 : SplatBitSize == 16 ? MVT::v8i16
7952 : MVT::v16i8;
7953 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7954 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7955 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7956 }
7957
7958 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7959 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7960 // Check if an immediate VMOV works.
7961 EVT VmovVT;
7962 SDValue Val =
7963 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7964 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7965
7966 if (Val.getNode()) {
7967 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7968 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7969 }
7970
7971 // Try an immediate VMVN.
7972 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7973 Val = isVMOVModifiedImm(
7974 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7975 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7976 if (Val.getNode()) {
7977 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7978 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7979 }
7980
7981 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7982 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7983 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7984 if (ImmVal != -1) {
7985 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7986 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7987 }
7988 }
7989
7990 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7991 // type.
7992 if (ST->hasMVEIntegerOps() &&
7993 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7994 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7995 : SplatBitSize == 16 ? MVT::v8i16
7996 : MVT::v16i8;
7997 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7998 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7999 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
8000 }
8001 }
8002 }
8003
8004 // Scan through the operands to see if only one value is used.
8005 //
8006 // As an optimisation, even if more than one value is used it may be more
8007 // profitable to splat with one value then change some lanes.
8008 //
8009 // Heuristically we decide to do this if the vector has a "dominant" value,
8010 // defined as splatted to more than half of the lanes.
8011 unsigned NumElts = VT.getVectorNumElements();
8012 bool isOnlyLowElement = true;
8013 bool usesOnlyOneValue = true;
8014 bool hasDominantValue = false;
8015 bool isConstant = true;
8016
8017 // Map of the number of times a particular SDValue appears in the
8018 // element list.
8019 DenseMap<SDValue, unsigned> ValueCounts;
8020 SDValue Value;
8021 for (unsigned i = 0; i < NumElts; ++i) {
8022 SDValue V = Op.getOperand(i);
8023 if (V.isUndef())
8024 continue;
8025 if (i > 0)
8026 isOnlyLowElement = false;
8027 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
8028 isConstant = false;
8029
8030 ValueCounts.insert(std::make_pair(V, 0));
8031 unsigned &Count = ValueCounts[V];
8032
8033 // Is this value dominant? (takes up more than half of the lanes)
8034 if (++Count > (NumElts / 2)) {
8035 hasDominantValue = true;
8036 Value = V;
8037 }
8038 }
8039 if (ValueCounts.size() != 1)
8040 usesOnlyOneValue = false;
8041 if (!Value.getNode() && !ValueCounts.empty())
8042 Value = ValueCounts.begin()->first;
8043
8044 if (ValueCounts.empty())
8045 return DAG.getUNDEF(VT);
8046
8047 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8048 // Keep going if we are hitting this case.
8049 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8050 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8051
8052 unsigned EltSize = VT.getScalarSizeInBits();
8053
8054 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8055 // i32 and try again.
8056 if (hasDominantValue && EltSize <= 32) {
8057 if (!isConstant) {
8058 SDValue N;
8059
8060 // If we are VDUPing a value that comes directly from a vector, that will
8061 // cause an unnecessary move to and from a GPR, where instead we could
8062 // just use VDUPLANE. We can only do this if the lane being extracted
8063 // is at a constant index, as the VDUP from lane instructions only have
8064 // constant-index forms.
8065 ConstantSDNode *constIndex;
8066 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8067 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8068 // We need to create a new undef vector to use for the VDUPLANE if the
8069 // size of the vector from which we get the value is different than the
8070 // size of the vector that we need to create. We will insert the element
8071 // such that the register coalescer will remove unnecessary copies.
8072 if (VT != Value->getOperand(0).getValueType()) {
8073 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8075 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8076 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8077 Value, DAG.getConstant(index, dl, MVT::i32)),
8078 DAG.getConstant(index, dl, MVT::i32));
8079 } else
8080 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8081 Value->getOperand(0), Value->getOperand(1));
8082 } else
8083 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8084
8085 if (!usesOnlyOneValue) {
8086 // The dominant value was splatted as 'N', but we now have to insert
8087 // all differing elements.
8088 for (unsigned I = 0; I < NumElts; ++I) {
8089 if (Op.getOperand(I) == Value)
8090 continue;
8092 Ops.push_back(N);
8093 Ops.push_back(Op.getOperand(I));
8094 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8095 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8096 }
8097 }
8098 return N;
8099 }
8103 assert(FVT == MVT::f32 || FVT == MVT::f16);
8104 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8105 for (unsigned i = 0; i < NumElts; ++i)
8106 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8107 Op.getOperand(i)));
8108 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8109 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8110 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8111 if (Val.getNode())
8112 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8113 }
8114 if (usesOnlyOneValue) {
8115 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8116 if (isConstant && Val.getNode())
8117 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8118 }
8119 }
8120
8121 // If all elements are constants and the case above didn't get hit, fall back
8122 // to the default expansion, which will generate a load from the constant
8123 // pool.
8124 if (isConstant)
8125 return SDValue();
8126
8127 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8128 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8129 // length <= 2.
8130 if (NumElts >= 4)
8131 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8132 return shuffle;
8133
8134 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8135 // VCVT's
8136 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8137 return VCVT;
8138 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8139 return VCVT;
8140
8141 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8142 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8143 // into two 64-bit vectors; we might discover a better way to lower it.
8144 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8145 EVT ExtVT = VT.getVectorElementType();
8146 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8147 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8148 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8149 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8150 SDValue Upper =
8151 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8152 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8153 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8154 if (Lower && Upper)
8155 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8156 }
8157
8158 // Vectors with 32- or 64-bit elements can be built by directly assigning
8159 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8160 // will be legalized.
8161 if (EltSize >= 32) {
8162 // Do the expansion with floating-point types, since that is what the VFP
8163 // registers are defined to use, and since i64 is not legal.
8164 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8165 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8167 for (unsigned i = 0; i < NumElts; ++i)
8168 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8169 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8170 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8171 }
8172
8173 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8174 // know the default expansion would otherwise fall back on something even
8175 // worse. For a vector with one or two non-undef values, that's
8176 // scalar_to_vector for the elements followed by a shuffle (provided the
8177 // shuffle is valid for the target) and materialization element by element
8178 // on the stack followed by a load for everything else.
8179 if (!isConstant && !usesOnlyOneValue) {
8180 SDValue Vec = DAG.getUNDEF(VT);
8181 for (unsigned i = 0 ; i < NumElts; ++i) {
8182 SDValue V = Op.getOperand(i);
8183 if (V.isUndef())
8184 continue;
8185 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8186 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8187 }
8188 return Vec;
8189 }
8190
8191 return SDValue();
8192}
8193
8194// Gather data to see if the operation can be modelled as a
8195// shuffle in combination with VEXTs.
8196SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8197 SelectionDAG &DAG) const {
8198 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8199 SDLoc dl(Op);
8200 EVT VT = Op.getValueType();
8201 unsigned NumElts = VT.getVectorNumElements();
8202
8203 struct ShuffleSourceInfo {
8204 SDValue Vec;
8205 unsigned MinElt = std::numeric_limits<unsigned>::max();
8206 unsigned MaxElt = 0;
8207
8208 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8209 // be compatible with the shuffle we intend to construct. As a result
8210 // ShuffleVec will be some sliding window into the original Vec.
8211 SDValue ShuffleVec;
8212
8213 // Code should guarantee that element i in Vec starts at element "WindowBase
8214 // + i * WindowScale in ShuffleVec".
8215 int WindowBase = 0;
8216 int WindowScale = 1;
8217
8218 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8219
8220 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8221 };
8222
8223 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8224 // node.
8226 for (unsigned i = 0; i < NumElts; ++i) {
8227 SDValue V = Op.getOperand(i);
8228 if (V.isUndef())
8229 continue;
8230 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8231 // A shuffle can only come from building a vector from various
8232 // elements of other vectors.
8233 return SDValue();
8234 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8235 // Furthermore, shuffles require a constant mask, whereas extractelts
8236 // accept variable indices.
8237 return SDValue();
8238 }
8239
8240 // Add this element source to the list if it's not already there.
8241 SDValue SourceVec = V.getOperand(0);
8242 auto Source = llvm::find(Sources, SourceVec);
8243 if (Source == Sources.end())
8244 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8245
8246 // Update the minimum and maximum lane number seen.
8247 unsigned EltNo = V.getConstantOperandVal(1);
8248 Source->MinElt = std::min(Source->MinElt, EltNo);
8249 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8250 }
8251
8252 // Currently only do something sane when at most two source vectors
8253 // are involved.
8254 if (Sources.size() > 2)
8255 return SDValue();
8256
8257 // Find out the smallest element size among result and two sources, and use
8258 // it as element size to build the shuffle_vector.
8259 EVT SmallestEltTy = VT.getVectorElementType();
8260 for (auto &Source : Sources) {
8261 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8262 if (SrcEltTy.bitsLT(SmallestEltTy))
8263 SmallestEltTy = SrcEltTy;
8264 }
8265 unsigned ResMultiplier =
8266 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8267 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8268 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8269
8270 // If the source vector is too wide or too narrow, we may nevertheless be able
8271 // to construct a compatible shuffle either by concatenating it with UNDEF or
8272 // extracting a suitable range of elements.
8273 for (auto &Src : Sources) {
8274 EVT SrcVT = Src.ShuffleVec.getValueType();
8275
8276 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8277 uint64_t VTSize = VT.getFixedSizeInBits();
8278 if (SrcVTSize == VTSize)
8279 continue;
8280
8281 // This stage of the search produces a source with the same element type as
8282 // the original, but with a total width matching the BUILD_VECTOR output.
8283 EVT EltVT = SrcVT.getVectorElementType();
8284 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8285 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8286
8287 if (SrcVTSize < VTSize) {
8288 if (2 * SrcVTSize != VTSize)
8289 return SDValue();
8290 // We can pad out the smaller vector for free, so if it's part of a
8291 // shuffle...
8292 Src.ShuffleVec =
8293 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8294 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8295 continue;
8296 }
8297
8298 if (SrcVTSize != 2 * VTSize)
8299 return SDValue();
8300
8301 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8302 // Span too large for a VEXT to cope
8303 return SDValue();
8304 }
8305
8306 if (Src.MinElt >= NumSrcElts) {
8307 // The extraction can just take the second half
8308 Src.ShuffleVec =
8309 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8310 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8311 Src.WindowBase = -NumSrcElts;
8312 } else if (Src.MaxElt < NumSrcElts) {
8313 // The extraction can just take the first half
8314 Src.ShuffleVec =
8315 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8316 DAG.getConstant(0, dl, MVT::i32));
8317 } else {
8318 // An actual VEXT is needed
8319 SDValue VEXTSrc1 =
8320 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8321 DAG.getConstant(0, dl, MVT::i32));
8322 SDValue VEXTSrc2 =
8323 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8324 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8325
8326 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8327 VEXTSrc2,
8328 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8329 Src.WindowBase = -Src.MinElt;
8330 }
8331 }
8332
8333 // Another possible incompatibility occurs from the vector element types. We
8334 // can fix this by bitcasting the source vectors to the same type we intend
8335 // for the shuffle.
8336 for (auto &Src : Sources) {
8337 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8338 if (SrcEltTy == SmallestEltTy)
8339 continue;
8340 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8341 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8342 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8343 Src.WindowBase *= Src.WindowScale;
8344 }
8345
8346 // Final check before we try to actually produce a shuffle.
8347 LLVM_DEBUG(for (auto Src
8348 : Sources)
8349 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8350
8351 // The stars all align, our next step is to produce the mask for the shuffle.
8353 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8354 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8355 SDValue Entry = Op.getOperand(i);
8356 if (Entry.isUndef())
8357 continue;
8358
8359 auto Src = llvm::find(Sources, Entry.getOperand(0));
8360 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8361
8362 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8363 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8364 // segment.
8365 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8366 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8367 VT.getScalarSizeInBits());
8368 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8369
8370 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8371 // starting at the appropriate offset.
8372 int *LaneMask = &Mask[i * ResMultiplier];
8373
8374 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8375 ExtractBase += NumElts * (Src - Sources.begin());
8376 for (int j = 0; j < LanesDefined; ++j)
8377 LaneMask[j] = ExtractBase + j;
8378 }
8379
8380
8381 // We can't handle more than two sources. This should have already
8382 // been checked before this point.
8383 assert(Sources.size() <= 2 && "Too many sources!");
8384
8385 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8386 for (unsigned i = 0; i < Sources.size(); ++i)
8387 ShuffleOps[i] = Sources[i].ShuffleVec;
8388
8389 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8390 ShuffleOps[1], Mask, DAG);
8391 if (!Shuffle)
8392 return SDValue();
8393 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8394}
8395
8397 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8406 OP_VUZPL, // VUZP, left result
8407 OP_VUZPR, // VUZP, right result
8408 OP_VZIPL, // VZIP, left result
8409 OP_VZIPR, // VZIP, right result
8410 OP_VTRNL, // VTRN, left result
8411 OP_VTRNR // VTRN, right result
8413
8414static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8415 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8416 switch (OpNum) {
8417 case OP_COPY:
8418 case OP_VREV:
8419 case OP_VDUP0:
8420 case OP_VDUP1:
8421 case OP_VDUP2:
8422 case OP_VDUP3:
8423 return true;
8424 }
8425 return false;
8426}
8427
8428/// isShuffleMaskLegal - Targets can use this to indicate that they only
8429/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8430/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8431/// are assumed to be legal.
8433 if (VT.getVectorNumElements() == 4 &&
8434 (VT.is128BitVector() || VT.is64BitVector())) {
8435 unsigned PFIndexes[4];
8436 for (unsigned i = 0; i != 4; ++i) {
8437 if (M[i] < 0)
8438 PFIndexes[i] = 8;
8439 else
8440 PFIndexes[i] = M[i];
8441 }
8442
8443 // Compute the index in the perfect shuffle table.
8444 unsigned PFTableIndex =
8445 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8446 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8447 unsigned Cost = (PFEntry >> 30);
8448
8449 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8450 return true;
8451 }
8452
8453 bool ReverseVEXT, isV_UNDEF;
8454 unsigned Imm, WhichResult;
8455
8456 unsigned EltSize = VT.getScalarSizeInBits();
8457 if (EltSize >= 32 ||
8459 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8460 isVREVMask(M, VT, 64) ||
8461 isVREVMask(M, VT, 32) ||
8462 isVREVMask(M, VT, 16))
8463 return true;
8464 else if (Subtarget->hasNEON() &&
8465 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8466 isVTBLMask(M, VT) ||
8467 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8468 return true;
8469 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8470 isReverseMask(M, VT))
8471 return true;
8472 else if (Subtarget->hasMVEIntegerOps() &&
8473 (isVMOVNMask(M, VT, true, false) ||
8474 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8475 return true;
8476 else if (Subtarget->hasMVEIntegerOps() &&
8477 (isTruncMask(M, VT, false, false) ||
8478 isTruncMask(M, VT, false, true) ||
8479 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8480 return true;
8481 else
8482 return false;
8483}
8484
8485/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8486/// the specified operations to build the shuffle.
8487static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8488 SDValue RHS, SelectionDAG &DAG,
8489 const SDLoc &dl) {
8490 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8491 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8492 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8493
8494 if (OpNum == OP_COPY) {
8495 if (LHSID == (1*9+2)*9+3) return LHS;
8496 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8497 return RHS;
8498 }
8499
8500 SDValue OpLHS, OpRHS;
8501 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8502 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8503 EVT VT = OpLHS.getValueType();
8504
8505 switch (OpNum) {
8506 default: llvm_unreachable("Unknown shuffle opcode!");
8507 case OP_VREV:
8508 // VREV divides the vector in half and swaps within the half.
8509 if (VT.getScalarSizeInBits() == 32)
8510 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8511 // vrev <4 x i16> -> VREV32
8512 if (VT.getScalarSizeInBits() == 16)
8513 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8514 // vrev <4 x i8> -> VREV16
8515 assert(VT.getScalarSizeInBits() == 8);
8516 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8517 case OP_VDUP0:
8518 case OP_VDUP1:
8519 case OP_VDUP2:
8520 case OP_VDUP3:
8521 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8522 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8523 case OP_VEXT1:
8524 case OP_VEXT2:
8525 case OP_VEXT3:
8526 return DAG.getNode(ARMISD::VEXT, dl, VT,
8527 OpLHS, OpRHS,
8528 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8529 case OP_VUZPL:
8530 case OP_VUZPR:
8531 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8532 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8533 case OP_VZIPL:
8534 case OP_VZIPR:
8535 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8536 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8537 case OP_VTRNL:
8538 case OP_VTRNR:
8539 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8540 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8541 }
8542}
8543
8545 ArrayRef<int> ShuffleMask,
8546 SelectionDAG &DAG) {
8547 // Check to see if we can use the VTBL instruction.
8548 SDValue V1 = Op.getOperand(0);
8549 SDValue V2 = Op.getOperand(1);
8550 SDLoc DL(Op);
8551
8552 SmallVector<SDValue, 8> VTBLMask;
8553 for (int I : ShuffleMask)
8554 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8555
8556 if (V2.getNode()->isUndef())
8557 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8558 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8559
8560 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8561 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8562}
8563
8565 SDLoc DL(Op);
8566 EVT VT = Op.getValueType();
8567
8568 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8569 "Expect an v8i16/v16i8 type");
8570 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8571 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8572 // extract the first 8 bytes into the top double word and the last 8 bytes
8573 // into the bottom double word, through a new vector shuffle that will be
8574 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8575 std::vector<int> NewMask;
8576 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8577 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8578 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8579 NewMask.push_back(i);
8580 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8581}
8582
8584 switch (VT.getSimpleVT().SimpleTy) {
8585 case MVT::v2i1:
8586 return MVT::v2f64;
8587 case MVT::v4i1:
8588 return MVT::v4i32;
8589 case MVT::v8i1:
8590 return MVT::v8i16;
8591 case MVT::v16i1:
8592 return MVT::v16i8;
8593 default:
8594 llvm_unreachable("Unexpected vector predicate type");
8595 }
8596}
8597
8599 SelectionDAG &DAG) {
8600 // Converting from boolean predicates to integers involves creating a vector
8601 // of all ones or all zeroes and selecting the lanes based upon the real
8602 // predicate.
8604 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8605 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8606
8607 SDValue AllZeroes =
8608 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8609 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8610
8611 // Get full vector type from predicate type
8613
8614 SDValue RecastV1;
8615 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8616 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8617 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8618 // since we know in hardware the sizes are really the same.
8619 if (VT != MVT::v16i1)
8620 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8621 else
8622 RecastV1 = Pred;
8623
8624 // Select either all ones or zeroes depending upon the real predicate bits.
8625 SDValue PredAsVector =
8626 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8627
8628 // Recast our new predicate-as-integer v16i8 vector into something
8629 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8630 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8631}
8632
8634 const ARMSubtarget *ST) {
8635 EVT VT = Op.getValueType();
8636 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8637 ArrayRef<int> ShuffleMask = SVN->getMask();
8638
8639 assert(ST->hasMVEIntegerOps() &&
8640 "No support for vector shuffle of boolean predicates");
8641
8642 SDValue V1 = Op.getOperand(0);
8643 SDValue V2 = Op.getOperand(1);
8644 SDLoc dl(Op);
8645 if (isReverseMask(ShuffleMask, VT)) {
8646 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8647 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8648 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8649 DAG.getConstant(16, dl, MVT::i32));
8650 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8651 }
8652
8653 // Until we can come up with optimised cases for every single vector
8654 // shuffle in existence we have chosen the least painful strategy. This is
8655 // to essentially promote the boolean predicate to a 8-bit integer, where
8656 // each predicate represents a byte. Then we fall back on a normal integer
8657 // vector shuffle and convert the result back into a predicate vector. In
8658 // many cases the generated code might be even better than scalar code
8659 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8660 // fields in a register into 8 other arbitrary 2-bit fields!
8661 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8662 EVT NewVT = PredAsVector1.getValueType();
8663 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8664 : PromoteMVEPredVector(dl, V2, VT, DAG);
8665 assert(PredAsVector2.getValueType() == NewVT &&
8666 "Expected identical vector type in expanded i1 shuffle!");
8667
8668 // Do the shuffle!
8669 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8670 PredAsVector2, ShuffleMask);
8671
8672 // Now return the result of comparing the shuffled vector with zero,
8673 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8674 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8675 if (VT == MVT::v2i1) {
8676 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8677 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8678 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8679 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8680 }
8681 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8682 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8683}
8684
8686 ArrayRef<int> ShuffleMask,
8687 SelectionDAG &DAG) {
8688 // Attempt to lower the vector shuffle using as many whole register movs as
8689 // possible. This is useful for types smaller than 32bits, which would
8690 // often otherwise become a series for grp movs.
8691 SDLoc dl(Op);
8692 EVT VT = Op.getValueType();
8693 if (VT.getScalarSizeInBits() >= 32)
8694 return SDValue();
8695
8696 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8697 "Unexpected vector type");
8698 int NumElts = VT.getVectorNumElements();
8699 int QuarterSize = NumElts / 4;
8700 // The four final parts of the vector, as i32's
8701 SDValue Parts[4];
8702
8703 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8704 // <u,u,u,u>), returning the vmov lane index
8705 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8706 // Detect which mov lane this would be from the first non-undef element.
8707 int MovIdx = -1;
8708 for (int i = 0; i < Length; i++) {
8709 if (ShuffleMask[Start + i] >= 0) {
8710 if (ShuffleMask[Start + i] % Length != i)
8711 return -1;
8712 MovIdx = ShuffleMask[Start + i] / Length;
8713 break;
8714 }
8715 }
8716 // If all items are undef, leave this for other combines
8717 if (MovIdx == -1)
8718 return -1;
8719 // Check the remaining values are the correct part of the same mov
8720 for (int i = 1; i < Length; i++) {
8721 if (ShuffleMask[Start + i] >= 0 &&
8722 (ShuffleMask[Start + i] / Length != MovIdx ||
8723 ShuffleMask[Start + i] % Length != i))
8724 return -1;
8725 }
8726 return MovIdx;
8727 };
8728
8729 for (int Part = 0; Part < 4; ++Part) {
8730 // Does this part look like a mov
8731 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8732 if (Elt != -1) {
8733 SDValue Input = Op->getOperand(0);
8734 if (Elt >= 4) {
8735 Input = Op->getOperand(1);
8736 Elt -= 4;
8737 }
8738 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8739 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8740 DAG.getConstant(Elt, dl, MVT::i32));
8741 }
8742 }
8743
8744 // Nothing interesting found, just return
8745 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8746 return SDValue();
8747
8748 // The other parts need to be built with the old shuffle vector, cast to a
8749 // v4i32 and extract_vector_elts
8750 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8751 SmallVector<int, 16> NewShuffleMask;
8752 for (int Part = 0; Part < 4; ++Part)
8753 for (int i = 0; i < QuarterSize; i++)
8754 NewShuffleMask.push_back(
8755 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8756 SDValue NewShuffle = DAG.getVectorShuffle(
8757 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8758 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8759
8760 for (int Part = 0; Part < 4; ++Part)
8761 if (!Parts[Part])
8762 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8763 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8764 }
8765 // Build a vector out of the various parts and bitcast it back to the original
8766 // type.
8767 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8768 return DAG.getBitcast(VT, NewVec);
8769}
8770
8772 ArrayRef<int> ShuffleMask,
8773 SelectionDAG &DAG) {
8774 SDValue V1 = Op.getOperand(0);
8775 SDValue V2 = Op.getOperand(1);
8776 EVT VT = Op.getValueType();
8777 unsigned NumElts = VT.getVectorNumElements();
8778
8779 // An One-Off Identity mask is one that is mostly an identity mask from as
8780 // single source but contains a single element out-of-place, either from a
8781 // different vector or from another position in the same vector. As opposed to
8782 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8783 // pair directly.
8784 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8785 int &OffElement) {
8786 OffElement = -1;
8787 int NonUndef = 0;
8788 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8789 if (Mask[i] == -1)
8790 continue;
8791 NonUndef++;
8792 if (Mask[i] != i + BaseOffset) {
8793 if (OffElement == -1)
8794 OffElement = i;
8795 else
8796 return false;
8797 }
8798 }
8799 return NonUndef > 2 && OffElement != -1;
8800 };
8801 int OffElement;
8802 SDValue VInput;
8803 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8804 VInput = V1;
8805 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8806 VInput = V2;
8807 else
8808 return SDValue();
8809
8810 SDLoc dl(Op);
8811 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8812 ? MVT::i32
8813 : VT.getScalarType();
8814 SDValue Elt = DAG.getNode(
8815 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8816 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8817 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8818 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8819 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8820}
8821
8823 const ARMSubtarget *ST) {
8824 SDValue V1 = Op.getOperand(0);
8825 SDValue V2 = Op.getOperand(1);
8826 SDLoc dl(Op);
8827 EVT VT = Op.getValueType();
8828 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8829 unsigned EltSize = VT.getScalarSizeInBits();
8830
8831 if (ST->hasMVEIntegerOps() && EltSize == 1)
8832 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8833
8834 // Convert shuffles that are directly supported on NEON to target-specific
8835 // DAG nodes, instead of keeping them as shuffles and matching them again
8836 // during code selection. This is more efficient and avoids the possibility
8837 // of inconsistencies between legalization and selection.
8838 // FIXME: floating-point vectors should be canonicalized to integer vectors
8839 // of the same time so that they get CSEd properly.
8840 ArrayRef<int> ShuffleMask = SVN->getMask();
8841
8842 if (EltSize <= 32) {
8843 if (SVN->isSplat()) {
8844 int Lane = SVN->getSplatIndex();
8845 // If this is undef splat, generate it via "just" vdup, if possible.
8846 if (Lane == -1) Lane = 0;
8847
8848 // Test if V1 is a SCALAR_TO_VECTOR.
8849 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8850 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8851 }
8852 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8853 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8854 // reaches it).
8855 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8856 !isa<ConstantSDNode>(V1.getOperand(0))) {
8857 bool IsScalarToVector = true;
8858 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8859 if (!V1.getOperand(i).isUndef()) {
8860 IsScalarToVector = false;
8861 break;
8862 }
8863 if (IsScalarToVector)
8864 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8865 }
8866 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8867 DAG.getConstant(Lane, dl, MVT::i32));
8868 }
8869
8870 bool ReverseVEXT = false;
8871 unsigned Imm = 0;
8872 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8873 if (ReverseVEXT)
8874 std::swap(V1, V2);
8875 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8876 DAG.getConstant(Imm, dl, MVT::i32));
8877 }
8878
8879 if (isVREVMask(ShuffleMask, VT, 64))
8880 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8881 if (isVREVMask(ShuffleMask, VT, 32))
8882 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8883 if (isVREVMask(ShuffleMask, VT, 16))
8884 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8885
8886 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8887 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8888 DAG.getConstant(Imm, dl, MVT::i32));
8889 }
8890
8891 // Check for Neon shuffles that modify both input vectors in place.
8892 // If both results are used, i.e., if there are two shuffles with the same
8893 // source operands and with masks corresponding to both results of one of
8894 // these operations, DAG memoization will ensure that a single node is
8895 // used for both shuffles.
8896 unsigned WhichResult = 0;
8897 bool isV_UNDEF = false;
8898 if (ST->hasNEON()) {
8899 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8900 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8901 if (isV_UNDEF)
8902 V2 = V1;
8903 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8904 .getValue(WhichResult);
8905 }
8906 }
8907 if (ST->hasMVEIntegerOps()) {
8908 if (isVMOVNMask(ShuffleMask, VT, false, false))
8909 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8910 DAG.getConstant(0, dl, MVT::i32));
8911 if (isVMOVNMask(ShuffleMask, VT, true, false))
8912 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8913 DAG.getConstant(1, dl, MVT::i32));
8914 if (isVMOVNMask(ShuffleMask, VT, true, true))
8915 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8916 DAG.getConstant(1, dl, MVT::i32));
8917 }
8918
8919 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8920 // shuffles that produce a result larger than their operands with:
8921 // shuffle(concat(v1, undef), concat(v2, undef))
8922 // ->
8923 // shuffle(concat(v1, v2), undef)
8924 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8925 //
8926 // This is useful in the general case, but there are special cases where
8927 // native shuffles produce larger results: the two-result ops.
8928 //
8929 // Look through the concat when lowering them:
8930 // shuffle(concat(v1, v2), undef)
8931 // ->
8932 // concat(VZIP(v1, v2):0, :1)
8933 //
8934 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8935 SDValue SubV1 = V1->getOperand(0);
8936 SDValue SubV2 = V1->getOperand(1);
8937 EVT SubVT = SubV1.getValueType();
8938
8939 // We expect these to have been canonicalized to -1.
8940 assert(llvm::all_of(ShuffleMask, [&](int i) {
8941 return i < (int)VT.getVectorNumElements();
8942 }) && "Unexpected shuffle index into UNDEF operand!");
8943
8944 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8945 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8946 if (isV_UNDEF)
8947 SubV2 = SubV1;
8948 assert((WhichResult == 0) &&
8949 "In-place shuffle of concat can only have one result!");
8950 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8951 SubV1, SubV2);
8952 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8953 Res.getValue(1));
8954 }
8955 }
8956 }
8957
8958 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8959 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8960 return V;
8961
8962 for (bool Top : {false, true}) {
8963 for (bool SingleSource : {false, true}) {
8964 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8965 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8966 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8967 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8968 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8969 SingleSource ? V1 : V2);
8970 if (Top) {
8971 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8972 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8973 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8974 }
8975 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8976 }
8977 }
8978 }
8979 }
8980
8981 // If the shuffle is not directly supported and it has 4 elements, use
8982 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8983 unsigned NumElts = VT.getVectorNumElements();
8984 if (NumElts == 4) {
8985 unsigned PFIndexes[4];
8986 for (unsigned i = 0; i != 4; ++i) {
8987 if (ShuffleMask[i] < 0)
8988 PFIndexes[i] = 8;
8989 else
8990 PFIndexes[i] = ShuffleMask[i];
8991 }
8992
8993 // Compute the index in the perfect shuffle table.
8994 unsigned PFTableIndex =
8995 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8996 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8997 unsigned Cost = (PFEntry >> 30);
8998
8999 if (Cost <= 4) {
9000 if (ST->hasNEON())
9001 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9002 else if (isLegalMVEShuffleOp(PFEntry)) {
9003 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9004 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9005 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
9006 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
9007 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
9008 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9009 }
9010 }
9011 }
9012
9013 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9014 if (EltSize >= 32) {
9015 // Do the expansion with floating-point types, since that is what the VFP
9016 // registers are defined to use, and since i64 is not legal.
9017 EVT EltVT = EVT::getFloatingPointVT(EltSize);
9018 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
9019 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
9020 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
9022 for (unsigned i = 0; i < NumElts; ++i) {
9023 if (ShuffleMask[i] < 0)
9024 Ops.push_back(DAG.getUNDEF(EltVT));
9025 else
9026 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
9027 ShuffleMask[i] < (int)NumElts ? V1 : V2,
9028 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9029 dl, MVT::i32)));
9030 }
9031 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9032 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9033 }
9034
9035 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9036 isReverseMask(ShuffleMask, VT))
9037 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9038
9039 if (ST->hasNEON() && VT == MVT::v8i8)
9040 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9041 return NewOp;
9042
9043 if (ST->hasMVEIntegerOps())
9044 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9045 return NewOp;
9046
9047 return SDValue();
9048}
9049
9051 const ARMSubtarget *ST) {
9052 EVT VecVT = Op.getOperand(0).getValueType();
9053 SDLoc dl(Op);
9054
9055 assert(ST->hasMVEIntegerOps() &&
9056 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9057
9058 SDValue Conv =
9059 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9060 unsigned Lane = Op.getConstantOperandVal(2);
9061 unsigned LaneWidth =
9063 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9064 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9065 Op.getOperand(1), DAG.getValueType(MVT::i1));
9066 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9067 DAG.getConstant(~Mask, dl, MVT::i32));
9068 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9069}
9070
9071SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9072 SelectionDAG &DAG) const {
9073 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9074 SDValue Lane = Op.getOperand(2);
9075 if (!isa<ConstantSDNode>(Lane))
9076 return SDValue();
9077
9078 SDValue Elt = Op.getOperand(1);
9079 EVT EltVT = Elt.getValueType();
9080
9081 if (Subtarget->hasMVEIntegerOps() &&
9082 Op.getValueType().getScalarSizeInBits() == 1)
9083 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9084
9085 if (getTypeAction(*DAG.getContext(), EltVT) ==
9087 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9088 // but the type system will try to do that if we don't intervene.
9089 // Reinterpret any such vector-element insertion as one with the
9090 // corresponding integer types.
9091
9092 SDLoc dl(Op);
9093
9094 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9095 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9097
9098 SDValue VecIn = Op.getOperand(0);
9099 EVT VecVT = VecIn.getValueType();
9100 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9101 VecVT.getVectorNumElements());
9102
9103 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9104 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9105 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9106 IVecIn, IElt, Lane);
9107 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9108 }
9109
9110 return Op;
9111}
9112
9114 const ARMSubtarget *ST) {
9115 EVT VecVT = Op.getOperand(0).getValueType();
9116 SDLoc dl(Op);
9117
9118 assert(ST->hasMVEIntegerOps() &&
9119 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9120
9121 SDValue Conv =
9122 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9123 unsigned Lane = Op.getConstantOperandVal(1);
9124 unsigned LaneWidth =
9126 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9127 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9128 return Shift;
9129}
9130
9132 const ARMSubtarget *ST) {
9133 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9134 SDValue Lane = Op.getOperand(1);
9135 if (!isa<ConstantSDNode>(Lane))
9136 return SDValue();
9137
9138 SDValue Vec = Op.getOperand(0);
9139 EVT VT = Vec.getValueType();
9140
9141 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9142 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9143
9144 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9145 SDLoc dl(Op);
9146 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9147 }
9148
9149 return Op;
9150}
9151
9153 const ARMSubtarget *ST) {
9154 SDLoc dl(Op);
9155 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9156 "Unexpected custom CONCAT_VECTORS lowering");
9158 "Unexpected custom CONCAT_VECTORS lowering");
9159 assert(ST->hasMVEIntegerOps() &&
9160 "CONCAT_VECTORS lowering only supported for MVE");
9161
9162 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9163 EVT Op1VT = V1.getValueType();
9164 EVT Op2VT = V2.getValueType();
9165 assert(Op1VT == Op2VT && "Operand types don't match!");
9166 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9167 "Unexpected i1 concat operations!");
9168 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9169
9170 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9171 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9172
9173 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9174 // promoted to v8i16, etc.
9175 MVT ElType =
9177 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9178
9179 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9180 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9181 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9182 // ConcatVT.
9183 SDValue ConVec =
9184 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9185 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9186 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9187 }
9188
9189 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9190 // to be the right size for the destination. For example, if Op1 is v4i1
9191 // then the promoted vector is v4i32. The result of concatenation gives a
9192 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9193 // needs truncating to i16 and inserting in the result.
9194 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9195 EVT NewVT = NewV.getValueType();
9196 EVT ConcatVT = ConVec.getValueType();
9197 unsigned ExtScale = 1;
9198 if (NewVT == MVT::v2f64) {
9199 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9200 ExtScale = 2;
9201 }
9202 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9203 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9204 DAG.getIntPtrConstant(i * ExtScale, dl));
9205 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9206 DAG.getConstant(j, dl, MVT::i32));
9207 }
9208 return ConVec;
9209 };
9210 unsigned j = 0;
9211 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9212 ConVec = ExtractInto(NewV1, ConVec, j);
9213 ConVec = ExtractInto(NewV2, ConVec, j);
9214
9215 // Now return the result of comparing the subvector with zero, which will
9216 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9217 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9218 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9219 };
9220
9221 // Concat each pair of subvectors and pack into the lower half of the array.
9222 SmallVector<SDValue> ConcatOps(Op->ops());
9223 while (ConcatOps.size() > 1) {
9224 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9225 SDValue V1 = ConcatOps[I];
9226 SDValue V2 = ConcatOps[I + 1];
9227 ConcatOps[I / 2] = ConcatPair(V1, V2);
9228 }
9229 ConcatOps.resize(ConcatOps.size() / 2);
9230 }
9231 return ConcatOps[0];
9232}
9233
9235 const ARMSubtarget *ST) {
9236 EVT VT = Op->getValueType(0);
9237 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9238 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9239
9240 // The only time a CONCAT_VECTORS operation can have legal types is when
9241 // two 64-bit vectors are concatenated to a 128-bit vector.
9242 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9243 "unexpected CONCAT_VECTORS");
9244 SDLoc dl(Op);
9245 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9246 SDValue Op0 = Op.getOperand(0);
9247 SDValue Op1 = Op.getOperand(1);
9248 if (!Op0.isUndef())
9249 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9250 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9251 DAG.getIntPtrConstant(0, dl));
9252 if (!Op1.isUndef())
9253 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9254 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9255 DAG.getIntPtrConstant(1, dl));
9256 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9257}
9258
9260 const ARMSubtarget *ST) {
9261 SDValue V1 = Op.getOperand(0);
9262 SDValue V2 = Op.getOperand(1);
9263 SDLoc dl(Op);
9264 EVT VT = Op.getValueType();
9265 EVT Op1VT = V1.getValueType();
9266 unsigned NumElts = VT.getVectorNumElements();
9267 unsigned Index = V2->getAsZExtVal();
9268
9269 assert(VT.getScalarSizeInBits() == 1 &&
9270 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9271 assert(ST->hasMVEIntegerOps() &&
9272 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9273
9274 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9275
9276 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9277 // promoted to v8i16, etc.
9278
9280
9281 if (NumElts == 2) {
9282 EVT SubVT = MVT::v4i32;
9283 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9284 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9285 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9286 DAG.getIntPtrConstant(i, dl));
9287 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9288 DAG.getConstant(j, dl, MVT::i32));
9289 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9290 DAG.getConstant(j + 1, dl, MVT::i32));
9291 }
9292 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9293 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9294 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9295 }
9296
9297 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9298 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9299 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9300 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9301 DAG.getIntPtrConstant(i, dl));
9302 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9303 DAG.getConstant(j, dl, MVT::i32));
9304 }
9305
9306 // Now return the result of comparing the subvector with zero,
9307 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9308 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9309 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9310}
9311
9312// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9314 const ARMSubtarget *ST) {
9315 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9316 EVT VT = N->getValueType(0);
9317 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9318 "Expected a vector i1 type!");
9319 SDValue Op = N->getOperand(0);
9320 EVT FromVT = Op.getValueType();
9321 SDLoc DL(N);
9322
9323 SDValue And =
9324 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9325 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9326 DAG.getCondCode(ISD::SETNE));
9327}
9328
9330 const ARMSubtarget *Subtarget) {
9331 if (!Subtarget->hasMVEIntegerOps())
9332 return SDValue();
9333
9334 EVT ToVT = N->getValueType(0);
9335 if (ToVT.getScalarType() == MVT::i1)
9336 return LowerTruncatei1(N, DAG, Subtarget);
9337
9338 // MVE does not have a single instruction to perform the truncation of a v4i32
9339 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9340 // Most of the instructions in MVE follow the 'Beats' system, where moving
9341 // values from different lanes is usually something that the instructions
9342 // avoid.
9343 //
9344 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9345 // which take a the top/bottom half of a larger lane and extend it (or do the
9346 // opposite, truncating into the top/bottom lane from a larger lane). Note
9347 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9348 // bottom 16bits from each vector lane. This works really well with T/B
9349 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9350 // to move order.
9351 //
9352 // But truncates and sext/zext are always going to be fairly common from llvm.
9353 // We have several options for how to deal with them:
9354 // - Wherever possible combine them into an instruction that makes them
9355 // "free". This includes loads/stores, which can perform the trunc as part
9356 // of the memory operation. Or certain shuffles that can be turned into
9357 // VMOVN/VMOVL.
9358 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9359 // trunc(mul(sext(a), sext(b))) may become
9360 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9361 // this case can use VMULL). This is performed in the
9362 // MVELaneInterleavingPass.
9363 // - Otherwise we have an option. By default we would expand the
9364 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9365 // registers. One for each vector lane in the vector. This can obviously be
9366 // very expensive.
9367 // - The other option is to use the fact that loads/store can extend/truncate
9368 // to turn a trunc into two truncating stack stores and a stack reload. This
9369 // becomes 3 back-to-back memory operations, but at least that is less than
9370 // all the insert/extracts.
9371 //
9372 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9373 // are either optimized where they can be, or eventually lowered into stack
9374 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9375 // two early, where other instructions would be better, and stops us from
9376 // having to reconstruct multiple buildvector shuffles into loads/stores.
9377 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9378 return SDValue();
9379 EVT FromVT = N->getOperand(0).getValueType();
9380 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9381 return SDValue();
9382
9383 SDValue Lo, Hi;
9384 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9385 SDLoc DL(N);
9386 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9387}
9388
9390 const ARMSubtarget *Subtarget) {
9391 if (!Subtarget->hasMVEIntegerOps())
9392 return SDValue();
9393
9394 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9395
9396 EVT ToVT = N->getValueType(0);
9397 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9398 return SDValue();
9399 SDValue Op = N->getOperand(0);
9400 EVT FromVT = Op.getValueType();
9401 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9402 return SDValue();
9403
9404 SDLoc DL(N);
9405 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9406 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9407 ExtVT = MVT::v8i16;
9408
9409 unsigned Opcode =
9411 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9412 SDValue Ext1 = Ext.getValue(1);
9413
9414 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9415 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9416 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9417 }
9418
9419 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9420}
9421
9422/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9423/// element has been zero/sign-extended, depending on the isSigned parameter,
9424/// from an integer type half its size.
9426 bool isSigned) {
9427 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9428 EVT VT = N->getValueType(0);
9429 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9430 SDNode *BVN = N->getOperand(0).getNode();
9431 if (BVN->getValueType(0) != MVT::v4i32 ||
9432 BVN->getOpcode() != ISD::BUILD_VECTOR)
9433 return false;
9434 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9435 unsigned HiElt = 1 - LoElt;
9436 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9437 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9438 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9439 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9440 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9441 return false;
9442 if (isSigned) {
9443 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9444 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9445 return true;
9446 } else {
9447 if (Hi0->isZero() && Hi1->isZero())
9448 return true;
9449 }
9450 return false;
9451 }
9452
9453 if (N->getOpcode() != ISD::BUILD_VECTOR)
9454 return false;
9455
9456 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9457 SDNode *Elt = N->getOperand(i).getNode();
9458 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9459 unsigned EltSize = VT.getScalarSizeInBits();
9460 unsigned HalfSize = EltSize / 2;
9461 if (isSigned) {
9462 if (!isIntN(HalfSize, C->getSExtValue()))
9463 return false;
9464 } else {
9465 if (!isUIntN(HalfSize, C->getZExtValue()))
9466 return false;
9467 }
9468 continue;
9469 }
9470 return false;
9471 }
9472
9473 return true;
9474}
9475
9476/// isSignExtended - Check if a node is a vector value that is sign-extended
9477/// or a constant BUILD_VECTOR with sign-extended elements.
9479 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9480 return true;
9481 if (isExtendedBUILD_VECTOR(N, DAG, true))
9482 return true;
9483 return false;
9484}
9485
9486/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9487/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9489 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9491 return true;
9492 if (isExtendedBUILD_VECTOR(N, DAG, false))
9493 return true;
9494 return false;
9495}
9496
9497static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9498 if (OrigVT.getSizeInBits() >= 64)
9499 return OrigVT;
9500
9501 assert(OrigVT.isSimple() && "Expecting a simple value type");
9502
9503 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9504 switch (OrigSimpleTy) {
9505 default: llvm_unreachable("Unexpected Vector Type");
9506 case MVT::v2i8:
9507 case MVT::v2i16:
9508 return MVT::v2i32;
9509 case MVT::v4i8:
9510 return MVT::v4i16;
9511 }
9512}
9513
9514/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9515/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9516/// We insert the required extension here to get the vector to fill a D register.
9518 const EVT &OrigTy,
9519 const EVT &ExtTy,
9520 unsigned ExtOpcode) {
9521 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9522 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9523 // 64-bits we need to insert a new extension so that it will be 64-bits.
9524 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9525 if (OrigTy.getSizeInBits() >= 64)
9526 return N;
9527
9528 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9529 EVT NewVT = getExtensionTo64Bits(OrigTy);
9530
9531 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9532}
9533
9534/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9535/// does not do any sign/zero extension. If the original vector is less
9536/// than 64 bits, an appropriate extension will be added after the load to
9537/// reach a total size of 64 bits. We have to add the extension separately
9538/// because ARM does not have a sign/zero extending load for vectors.
9540 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9541
9542 // The load already has the right type.
9543 if (ExtendedTy == LD->getMemoryVT())
9544 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9545 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9546 LD->getMemOperand()->getFlags());
9547
9548 // We need to create a zextload/sextload. We cannot just create a load
9549 // followed by a zext/zext node because LowerMUL is also run during normal
9550 // operation legalization where we can't create illegal types.
9551 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9552 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9553 LD->getMemoryVT(), LD->getAlign(),
9554 LD->getMemOperand()->getFlags());
9555}
9556
9557/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9558/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9559/// the unextended value. The unextended vector should be 64 bits so that it can
9560/// be used as an operand to a VMULL instruction. If the original vector size
9561/// before extension is less than 64 bits we add a an extension to resize
9562/// the vector to 64 bits.
9564 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9565 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9566 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9567 N->getOperand(0)->getValueType(0),
9568 N->getValueType(0),
9569 N->getOpcode());
9570
9571 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9572 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9573 "Expected extending load");
9574
9575 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9576 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9577 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9578 SDValue extLoad =
9579 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9580 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9581
9582 return newLoad;
9583 }
9584
9585 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9586 // have been legalized as a BITCAST from v4i32.
9587 if (N->getOpcode() == ISD::BITCAST) {
9588 SDNode *BVN = N->getOperand(0).getNode();
9590 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9591 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9592 return DAG.getBuildVector(
9593 MVT::v2i32, SDLoc(N),
9594 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9595 }
9596 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9597 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9598 EVT VT = N->getValueType(0);
9599 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9600 unsigned NumElts = VT.getVectorNumElements();
9601 MVT TruncVT = MVT::getIntegerVT(EltSize);
9603 SDLoc dl(N);
9604 for (unsigned i = 0; i != NumElts; ++i) {
9605 const APInt &CInt = N->getConstantOperandAPInt(i);
9606 // Element types smaller than 32 bits are not legal, so use i32 elements.
9607 // The values are implicitly truncated so sext vs. zext doesn't matter.
9608 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9609 }
9610 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9611}
9612
9613static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9614 unsigned Opcode = N->getOpcode();
9615 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9616 SDNode *N0 = N->getOperand(0).getNode();
9617 SDNode *N1 = N->getOperand(1).getNode();
9618 return N0->hasOneUse() && N1->hasOneUse() &&
9619 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9620 }
9621 return false;
9622}
9623
9624static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9625 unsigned Opcode = N->getOpcode();
9626 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9627 SDNode *N0 = N->getOperand(0).getNode();
9628 SDNode *N1 = N->getOperand(1).getNode();
9629 return N0->hasOneUse() && N1->hasOneUse() &&
9630 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9631 }
9632 return false;
9633}
9634
9636 // Multiplications are only custom-lowered for 128-bit vectors so that
9637 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9638 EVT VT = Op.getValueType();
9639 assert(VT.is128BitVector() && VT.isInteger() &&
9640 "unexpected type for custom-lowering ISD::MUL");
9641 SDNode *N0 = Op.getOperand(0).getNode();
9642 SDNode *N1 = Op.getOperand(1).getNode();
9643 unsigned NewOpc = 0;
9644 bool isMLA = false;
9645 bool isN0SExt = isSignExtended(N0, DAG);
9646 bool isN1SExt = isSignExtended(N1, DAG);
9647 if (isN0SExt && isN1SExt)
9648 NewOpc = ARMISD::VMULLs;
9649 else {
9650 bool isN0ZExt = isZeroExtended(N0, DAG);
9651 bool isN1ZExt = isZeroExtended(N1, DAG);
9652 if (isN0ZExt && isN1ZExt)
9653 NewOpc = ARMISD::VMULLu;
9654 else if (isN1SExt || isN1ZExt) {
9655 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9656 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9657 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9658 NewOpc = ARMISD::VMULLs;
9659 isMLA = true;
9660 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9661 NewOpc = ARMISD::VMULLu;
9662 isMLA = true;
9663 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9664 std::swap(N0, N1);
9665 NewOpc = ARMISD::VMULLu;
9666 isMLA = true;
9667 }
9668 }
9669
9670 if (!NewOpc) {
9671 if (VT == MVT::v2i64)
9672 // Fall through to expand this. It is not legal.
9673 return SDValue();
9674 else
9675 // Other vector multiplications are legal.
9676 return Op;
9677 }
9678 }
9679
9680 // Legalize to a VMULL instruction.
9681 SDLoc DL(Op);
9682 SDValue Op0;
9683 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9684 if (!isMLA) {
9685 Op0 = SkipExtensionForVMULL(N0, DAG);
9687 Op1.getValueType().is64BitVector() &&
9688 "unexpected types for extended operands to VMULL");
9689 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9690 }
9691
9692 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9693 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9694 // vmull q0, d4, d6
9695 // vmlal q0, d5, d6
9696 // is faster than
9697 // vaddl q0, d4, d5
9698 // vmovl q1, d6
9699 // vmul q0, q0, q1
9700 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9701 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9702 EVT Op1VT = Op1.getValueType();
9703 return DAG.getNode(N0->getOpcode(), DL, VT,
9704 DAG.getNode(NewOpc, DL, VT,
9705 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9706 DAG.getNode(NewOpc, DL, VT,
9707 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9708}
9709
9711 SelectionDAG &DAG) {
9712 // TODO: Should this propagate fast-math-flags?
9713
9714 // Convert to float
9715 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9716 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9717 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9718 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9719 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9720 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9721 // Get reciprocal estimate.
9722 // float4 recip = vrecpeq_f32(yf);
9723 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9724 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9725 Y);
9726 // Because char has a smaller range than uchar, we can actually get away
9727 // without any newton steps. This requires that we use a weird bias
9728 // of 0xb000, however (again, this has been exhaustively tested).
9729 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9730 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9731 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9732 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9733 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9734 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9735 // Convert back to short.
9736 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9737 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9738 return X;
9739}
9740
9742 SelectionDAG &DAG) {
9743 // TODO: Should this propagate fast-math-flags?
9744
9745 SDValue N2;
9746 // Convert to float.
9747 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9748 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9749 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9750 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9751 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9752 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9753
9754 // Use reciprocal estimate and one refinement step.
9755 // float4 recip = vrecpeq_f32(yf);
9756 // recip *= vrecpsq_f32(yf, recip);
9757 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9758 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9759 N1);
9760 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9761 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9762 N1, N2);
9763 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9764 // Because short has a smaller range than ushort, we can actually get away
9765 // with only a single newton step. This requires that we use a weird bias
9766 // of 89, however (again, this has been exhaustively tested).
9767 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9768 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9769 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9770 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9771 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9772 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9773 // Convert back to integer and return.
9774 // return vmovn_s32(vcvt_s32_f32(result));
9775 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9776 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9777 return N0;
9778}
9779
9781 const ARMSubtarget *ST) {
9782 EVT VT = Op.getValueType();
9783 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9784 "unexpected type for custom-lowering ISD::SDIV");
9785
9786 SDLoc dl(Op);
9787 SDValue N0 = Op.getOperand(0);
9788 SDValue N1 = Op.getOperand(1);
9789 SDValue N2, N3;
9790
9791 if (VT == MVT::v8i8) {
9792 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9793 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9794
9795 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9796 DAG.getIntPtrConstant(4, dl));
9797 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9798 DAG.getIntPtrConstant(4, dl));
9799 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9800 DAG.getIntPtrConstant(0, dl));
9801 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9802 DAG.getIntPtrConstant(0, dl));
9803
9804 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9805 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9806
9807 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9808 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9809
9810 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9811 return N0;
9812 }
9813 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9814}
9815
9817 const ARMSubtarget *ST) {
9818 // TODO: Should this propagate fast-math-flags?
9819 EVT VT = Op.getValueType();
9820 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9821 "unexpected type for custom-lowering ISD::UDIV");
9822
9823 SDLoc dl(Op);
9824 SDValue N0 = Op.getOperand(0);
9825 SDValue N1 = Op.getOperand(1);
9826 SDValue N2, N3;
9827
9828 if (VT == MVT::v8i8) {
9829 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9830 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9831
9832 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9833 DAG.getIntPtrConstant(4, dl));
9834 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9835 DAG.getIntPtrConstant(4, dl));
9836 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9837 DAG.getIntPtrConstant(0, dl));
9838 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9839 DAG.getIntPtrConstant(0, dl));
9840
9841 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9842 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9843
9844 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9845 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9846
9847 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9848 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9849 MVT::i32),
9850 N0);
9851 return N0;
9852 }
9853
9854 // v4i16 sdiv ... Convert to float.
9855 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9856 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9857 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9858 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9859 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9860 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9861
9862 // Use reciprocal estimate and two refinement steps.
9863 // float4 recip = vrecpeq_f32(yf);
9864 // recip *= vrecpsq_f32(yf, recip);
9865 // recip *= vrecpsq_f32(yf, recip);
9866 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9867 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9868 BN1);
9869 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9870 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9871 BN1, N2);
9872 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9873 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9874 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9875 BN1, N2);
9876 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9877 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9878 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9879 // and that it will never cause us to return an answer too large).
9880 // float4 result = as_float4(as_int4(xf*recip) + 2);
9881 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9882 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9883 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9884 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9885 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9886 // Convert back to integer and return.
9887 // return vmovn_u32(vcvt_s32_f32(result));
9888 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9889 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9890 return N0;
9891}
9892
9894 SDNode *N = Op.getNode();
9895 EVT VT = N->getValueType(0);
9896 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9897
9898 SDValue Carry = Op.getOperand(2);
9899
9900 SDLoc DL(Op);
9901
9902 SDValue Result;
9903 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9904 // This converts the boolean value carry into the carry flag.
9905 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9906
9907 // Do the addition proper using the carry flag we wanted.
9908 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9909 Op.getOperand(1), Carry);
9910
9911 // Now convert the carry flag into a boolean value.
9912 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9913 } else {
9914 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9915 // have to invert the carry first.
9916 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9917 DAG.getConstant(1, DL, MVT::i32), Carry);
9918 // This converts the boolean value carry into the carry flag.
9919 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9920
9921 // Do the subtraction proper using the carry flag we wanted.
9922 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9923 Op.getOperand(1), Carry);
9924
9925 // Now convert the carry flag into a boolean value.
9926 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9927 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9928 // by ISD::USUBO_CARRY, so compute 1 - C.
9929 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9930 DAG.getConstant(1, DL, MVT::i32), Carry);
9931 }
9932
9933 // Return both values.
9934 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9935}
9936
9937SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9938 assert(Subtarget->isTargetDarwin());
9939
9940 // For iOS, we want to call an alternative entry point: __sincos_stret,
9941 // return values are passed via sret.
9942 SDLoc dl(Op);
9943 SDValue Arg = Op.getOperand(0);
9944 EVT ArgVT = Arg.getValueType();
9945 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9946 auto PtrVT = getPointerTy(DAG.getDataLayout());
9947
9949 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9950
9951 // Pair of floats / doubles used to pass the result.
9952 Type *RetTy = StructType::get(ArgTy, ArgTy);
9953 auto &DL = DAG.getDataLayout();
9954
9956 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9957 SDValue SRet;
9958 if (ShouldUseSRet) {
9959 // Create stack object for sret.
9960 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9961 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9962 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9963 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9964
9965 ArgListEntry Entry;
9966 Entry.Node = SRet;
9967 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9968 Entry.IsSExt = false;
9969 Entry.IsZExt = false;
9970 Entry.IsSRet = true;
9971 Args.push_back(Entry);
9973 }
9974
9975 ArgListEntry Entry;
9976 Entry.Node = Arg;
9977 Entry.Ty = ArgTy;
9978 Entry.IsSExt = false;
9979 Entry.IsZExt = false;
9980 Args.push_back(Entry);
9981
9982 RTLIB::Libcall LC =
9983 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9984 const char *LibcallName = getLibcallName(LC);
9986 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9987
9989 CLI.setDebugLoc(dl)
9990 .setChain(DAG.getEntryNode())
9991 .setCallee(CC, RetTy, Callee, std::move(Args))
9992 .setDiscardResult(ShouldUseSRet);
9993 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9994
9995 if (!ShouldUseSRet)
9996 return CallResult.first;
9997
9998 SDValue LoadSin =
9999 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
10000
10001 // Address of cos field.
10002 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
10003 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
10004 SDValue LoadCos =
10005 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
10006
10007 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
10008 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
10009 LoadSin.getValue(0), LoadCos.getValue(0));
10010}
10011
10012SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
10013 bool Signed,
10014 SDValue &Chain) const {
10015 EVT VT = Op.getValueType();
10016 assert((VT == MVT::i32 || VT == MVT::i64) &&
10017 "unexpected type for custom lowering DIV");
10018 SDLoc dl(Op);
10019
10020 const auto &DL = DAG.getDataLayout();
10021 const auto &TLI = DAG.getTargetLoweringInfo();
10022
10023 const char *Name = nullptr;
10024 if (Signed)
10025 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
10026 else
10027 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
10028
10030
10032
10033 for (auto AI : {1, 0}) {
10034 ArgListEntry Arg;
10035 Arg.Node = Op.getOperand(AI);
10036 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10037 Args.push_back(Arg);
10038 }
10039
10040 CallLoweringInfo CLI(DAG);
10041 CLI.setDebugLoc(dl)
10042 .setChain(Chain)
10044 ES, std::move(Args));
10045
10046 return LowerCallTo(CLI).first;
10047}
10048
10049// This is a code size optimisation: return the original SDIV node to
10050// DAGCombiner when we don't want to expand SDIV into a sequence of
10051// instructions, and an empty node otherwise which will cause the
10052// SDIV to be expanded in DAGCombine.
10053SDValue
10054ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10055 SelectionDAG &DAG,
10056 SmallVectorImpl<SDNode *> &Created) const {
10057 // TODO: Support SREM
10058 if (N->getOpcode() != ISD::SDIV)
10059 return SDValue();
10060
10061 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10062 const bool MinSize = ST.hasMinSize();
10063 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10064 : ST.hasDivideInARMMode();
10065
10066 // Don't touch vector types; rewriting this may lead to scalarizing
10067 // the int divs.
10068 if (N->getOperand(0).getValueType().isVector())
10069 return SDValue();
10070
10071 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10072 // hwdiv support for this to be really profitable.
10073 if (!(MinSize && HasDivide))
10074 return SDValue();
10075
10076 // ARM mode is a bit simpler than Thumb: we can handle large power
10077 // of 2 immediates with 1 mov instruction; no further checks required,
10078 // just return the sdiv node.
10079 if (!ST.isThumb())
10080 return SDValue(N, 0);
10081
10082 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10083 // and thus lose the code size benefits of a MOVS that requires only 2.
10084 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10085 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10086 if (Divisor.sgt(128))
10087 return SDValue();
10088
10089 return SDValue(N, 0);
10090}
10091
10092SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10093 bool Signed) const {
10094 assert(Op.getValueType() == MVT::i32 &&
10095 "unexpected type for custom lowering DIV");
10096 SDLoc dl(Op);
10097
10098 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10099 DAG.getEntryNode(), Op.getOperand(1));
10100
10101 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10102}
10103
10105 SDLoc DL(N);
10106 SDValue Op = N->getOperand(1);
10107 if (N->getValueType(0) == MVT::i32)
10108 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10109 SDValue Lo, Hi;
10110 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10111 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10112 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10113}
10114
10115void ARMTargetLowering::ExpandDIV_Windows(
10116 SDValue Op, SelectionDAG &DAG, bool Signed,
10118 const auto &DL = DAG.getDataLayout();
10119 const auto &TLI = DAG.getTargetLoweringInfo();
10120
10121 assert(Op.getValueType() == MVT::i64 &&
10122 "unexpected type for custom lowering DIV");
10123 SDLoc dl(Op);
10124
10125 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10126
10127 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10128
10129 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10130 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10131 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10132 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10133
10134 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10135}
10136
10138 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10139 EVT MemVT = LD->getMemoryVT();
10140 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10141 MemVT == MVT::v16i1) &&
10142 "Expected a predicate type!");
10143 assert(MemVT == Op.getValueType());
10144 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10145 "Expected a non-extending load");
10146 assert(LD->isUnindexed() && "Expected a unindexed load");
10147
10148 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10149 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10150 // need to make sure that 8/4/2 bits are actually loaded into the correct
10151 // place, which means loading the value and then shuffling the values into
10152 // the bottom bits of the predicate.
10153 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10154 // for BE).
10155 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10156 // a natural VMSR(load), so needs to be reversed.
10157
10158 SDLoc dl(Op);
10159 SDValue Load = DAG.getExtLoad(
10160 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10162 LD->getMemOperand());
10163 SDValue Val = Load;
10164 if (DAG.getDataLayout().isBigEndian())
10165 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10166 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10167 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10168 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10169 if (MemVT != MVT::v16i1)
10170 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10171 DAG.getConstant(0, dl, MVT::i32));
10172 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10173}
10174
10175void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10176 SelectionDAG &DAG) const {
10177 LoadSDNode *LD = cast<LoadSDNode>(N);
10178 EVT MemVT = LD->getMemoryVT();
10179 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10180
10181 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10182 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10183 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10184 SDLoc dl(N);
10186 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10187 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10188 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10189 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10190 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10191 Results.append({Pair, Result.getValue(2)});
10192 }
10193}
10194
10196 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10197 EVT MemVT = ST->getMemoryVT();
10198 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10199 MemVT == MVT::v16i1) &&
10200 "Expected a predicate type!");
10201 assert(MemVT == ST->getValue().getValueType());
10202 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10203 assert(ST->isUnindexed() && "Expected a unindexed store");
10204
10205 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10206 // top bits unset and a scalar store.
10207 SDLoc dl(Op);
10208 SDValue Build = ST->getValue();
10209 if (MemVT != MVT::v16i1) {
10211 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10212 unsigned Elt = DAG.getDataLayout().isBigEndian()
10213 ? MemVT.getVectorNumElements() - I - 1
10214 : I;
10215 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10216 DAG.getConstant(Elt, dl, MVT::i32)));
10217 }
10218 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10219 Ops.push_back(DAG.getUNDEF(MVT::i32));
10220 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10221 }
10222 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10223 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10224 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10225 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10226 DAG.getConstant(16, dl, MVT::i32));
10227 return DAG.getTruncStore(
10228 ST->getChain(), dl, GRP, ST->getBasePtr(),
10230 ST->getMemOperand());
10231}
10232
10234 const ARMSubtarget *Subtarget) {
10235 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10236 EVT MemVT = ST->getMemoryVT();
10237 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10238
10239 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10240 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10241 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10242 SDNode *N = Op.getNode();
10243 SDLoc dl(N);
10244
10245 SDValue Lo = DAG.getNode(
10246 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10247 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10248 MVT::i32));
10249 SDValue Hi = DAG.getNode(
10250 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10251 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10252 MVT::i32));
10253
10254 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10255 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10256 MemVT, ST->getMemOperand());
10257 } else if (Subtarget->hasMVEIntegerOps() &&
10258 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10259 MemVT == MVT::v16i1))) {
10260 return LowerPredicateStore(Op, DAG);
10261 }
10262
10263 return SDValue();
10264}
10265
10266static bool isZeroVector(SDValue N) {
10267 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10268 (N->getOpcode() == ARMISD::VMOVIMM &&
10269 isNullConstant(N->getOperand(0))));
10270}
10271
10273 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10274 MVT VT = Op.getSimpleValueType();
10275 SDValue Mask = N->getMask();
10276 SDValue PassThru = N->getPassThru();
10277 SDLoc dl(Op);
10278
10279 if (isZeroVector(PassThru))
10280 return Op;
10281
10282 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10283 // zero too, and other values are lowered to a select.
10284 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10285 DAG.getTargetConstant(0, dl, MVT::i32));
10286 SDValue NewLoad = DAG.getMaskedLoad(
10287 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10288 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10289 N->getExtensionType(), N->isExpandingLoad());
10290 SDValue Combo = NewLoad;
10291 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10292 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10293 isZeroVector(PassThru->getOperand(0));
10294 if (!PassThru.isUndef() && !PassThruIsCastZero)
10295 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10296 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10297}
10298
10300 const ARMSubtarget *ST) {
10301 if (!ST->hasMVEIntegerOps())
10302 return SDValue();
10303
10304 SDLoc dl(Op);
10305 unsigned BaseOpcode = 0;
10306 switch (Op->getOpcode()) {
10307 default: llvm_unreachable("Expected VECREDUCE opcode");
10308 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10309 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10310 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10311 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10312 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10313 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10314 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10315 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10316 }
10317
10318 SDValue Op0 = Op->getOperand(0);
10319 EVT VT = Op0.getValueType();
10320 EVT EltVT = VT.getVectorElementType();
10321 unsigned NumElts = VT.getVectorNumElements();
10322 unsigned NumActiveLanes = NumElts;
10323
10324 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10325 NumActiveLanes == 2) &&
10326 "Only expected a power 2 vector size");
10327
10328 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10329 // allows us to easily extract vector elements from the lanes.
10330 while (NumActiveLanes > 4) {
10331 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10332 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10333 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10334 NumActiveLanes /= 2;
10335 }
10336
10337 SDValue Res;
10338 if (NumActiveLanes == 4) {
10339 // The remaining 4 elements are summed sequentially
10340 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10341 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10342 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10343 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10344 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10345 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10346 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10347 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10348 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10349 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10350 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10351 } else {
10352 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10353 DAG.getConstant(0, dl, MVT::i32));
10354 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10355 DAG.getConstant(1, dl, MVT::i32));
10356 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10357 }
10358
10359 // Result type may be wider than element type.
10360 if (EltVT != Op->getValueType(0))
10361 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10362 return Res;
10363}
10364
10366 const ARMSubtarget *ST) {
10367 if (!ST->hasMVEFloatOps())
10368 return SDValue();
10369 return LowerVecReduce(Op, DAG, ST);
10370}
10371
10373 const ARMSubtarget *ST) {
10374 if (!ST->hasNEON())
10375 return SDValue();
10376
10377 SDLoc dl(Op);
10378 SDValue Op0 = Op->getOperand(0);
10379 EVT VT = Op0.getValueType();
10380 EVT EltVT = VT.getVectorElementType();
10381
10382 unsigned PairwiseIntrinsic = 0;
10383 switch (Op->getOpcode()) {
10384 default:
10385 llvm_unreachable("Expected VECREDUCE opcode");
10387 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10388 break;
10390 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10391 break;
10393 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10394 break;
10396 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10397 break;
10398 }
10399 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10400
10401 unsigned NumElts = VT.getVectorNumElements();
10402 unsigned NumActiveLanes = NumElts;
10403
10404 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10405 NumActiveLanes == 2) &&
10406 "Only expected a power 2 vector size");
10407
10408 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10409 if (VT.is128BitVector()) {
10410 SDValue Lo, Hi;
10411 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10412 VT = Lo.getValueType();
10413 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10414 NumActiveLanes /= 2;
10415 }
10416
10417 // Use pairwise reductions until one lane remains
10418 while (NumActiveLanes > 1) {
10419 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10420 NumActiveLanes /= 2;
10421 }
10422
10423 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10424 DAG.getConstant(0, dl, MVT::i32));
10425
10426 // Result type may be wider than element type.
10427 if (EltVT != Op.getValueType()) {
10428 unsigned Extend = 0;
10429 switch (Op->getOpcode()) {
10430 default:
10431 llvm_unreachable("Expected VECREDUCE opcode");
10434 Extend = ISD::ZERO_EXTEND;
10435 break;
10438 Extend = ISD::SIGN_EXTEND;
10439 break;
10440 }
10441 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10442 }
10443 return Res;
10444}
10445
10447 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10448 // Acquire/Release load/store is not legal for targets without a dmb or
10449 // equivalent available.
10450 return SDValue();
10451
10452 // Monotonic load/store is legal for all targets.
10453 return Op;
10454}
10455
10458 SelectionDAG &DAG,
10459 const ARMSubtarget *Subtarget) {
10460 SDLoc DL(N);
10461 // Under Power Management extensions, the cycle-count is:
10462 // mrc p15, #0, <Rt>, c9, c13, #0
10463 SDValue Ops[] = { N->getOperand(0), // Chain
10464 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10465 DAG.getTargetConstant(15, DL, MVT::i32),
10466 DAG.getTargetConstant(0, DL, MVT::i32),
10467 DAG.getTargetConstant(9, DL, MVT::i32),
10468 DAG.getTargetConstant(13, DL, MVT::i32),
10469 DAG.getTargetConstant(0, DL, MVT::i32)
10470 };
10471
10472 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10473 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10474 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10475 DAG.getConstant(0, DL, MVT::i32)));
10476 Results.push_back(Cycles32.getValue(1));
10477}
10478
10480 SDLoc dl(V.getNode());
10481 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10482 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10483 if (isBigEndian)
10484 std::swap (VLo, VHi);
10485 SDValue RegClass =
10486 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10487 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10488 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10489 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10490 return SDValue(
10491 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10492}
10493
10496 SelectionDAG &DAG) {
10497 assert(N->getValueType(0) == MVT::i64 &&
10498 "AtomicCmpSwap on types less than 64 should be legal");
10499 SDValue Ops[] = {N->getOperand(1),
10500 createGPRPairNode(DAG, N->getOperand(2)),
10501 createGPRPairNode(DAG, N->getOperand(3)),
10502 N->getOperand(0)};
10503 SDNode *CmpSwap = DAG.getMachineNode(
10504 ARM::CMP_SWAP_64, SDLoc(N),
10505 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10506
10507 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10508 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10509
10510 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10511
10512 SDValue Lo =
10513 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10514 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10515 SDValue Hi =
10516 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10517 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10518 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10519 Results.push_back(SDValue(CmpSwap, 2));
10520}
10521
10522SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10523 SDLoc dl(Op);
10524 EVT VT = Op.getValueType();
10525 SDValue Chain = Op.getOperand(0);
10526 SDValue LHS = Op.getOperand(1);
10527 SDValue RHS = Op.getOperand(2);
10528 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10529 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10530
10531 // If we don't have instructions of this float type then soften to a libcall
10532 // and use SETCC instead.
10533 if (isUnsupportedFloatingType(LHS.getValueType())) {
10535 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10536 if (!RHS.getNode()) {
10537 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10538 CC = ISD::SETNE;
10539 }
10540 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10541 DAG.getCondCode(CC));
10542 return DAG.getMergeValues({Result, Chain}, dl);
10543 }
10544
10545 ARMCC::CondCodes CondCode, CondCode2;
10546 FPCCToARMCC(CC, CondCode, CondCode2);
10547
10548 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10549 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10550 // instructions using a chain instead of glue. This would also fix the problem
10551 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10552 // CondCode2 != AL.
10553 SDValue True = DAG.getConstant(1, dl, VT);
10554 SDValue False = DAG.getConstant(0, dl, VT);
10555 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10556 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10557 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10558 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10559 if (CondCode2 != ARMCC::AL) {
10560 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10561 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10562 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10563 }
10564 return DAG.getMergeValues({Result, Chain}, dl);
10565}
10566
10567SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10569
10570 EVT VT = getPointerTy(DAG.getDataLayout());
10571 SDLoc DL(Op);
10572 int FI = MFI.CreateFixedObject(4, 0, false);
10573 return DAG.getFrameIndex(FI, VT);
10574}
10575
10577 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10578 switch (Op.getOpcode()) {
10579 default: llvm_unreachable("Don't know how to custom lower this!");
10580 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10581 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10582 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10583 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10584 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10585 case ISD::SELECT: return LowerSELECT(Op, DAG);
10586 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10587 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10588 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10589 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10590 case ISD::VASTART: return LowerVASTART(Op, DAG);
10591 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10592 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10593 case ISD::SINT_TO_FP:
10594 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10597 case ISD::FP_TO_SINT:
10598 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10600 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10601 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10602 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10603 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10604 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10605 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10606 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10607 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10608 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10609 Subtarget);
10610 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10611 case ISD::SHL:
10612 case ISD::SRL:
10613 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10614 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10615 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10616 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10617 case ISD::SRL_PARTS:
10618 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10619 case ISD::CTTZ:
10620 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10621 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10622 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10623 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10624 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10625 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10626 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10627 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10628 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10629 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10630 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10631 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10632 case ISD::SIGN_EXTEND:
10633 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10634 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10635 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10636 case ISD::SET_FPMODE:
10637 return LowerSET_FPMODE(Op, DAG);
10638 case ISD::RESET_FPMODE:
10639 return LowerRESET_FPMODE(Op, DAG);
10640 case ISD::MUL: return LowerMUL(Op, DAG);
10641 case ISD::SDIV:
10642 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10643 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10644 return LowerSDIV(Op, DAG, Subtarget);
10645 case ISD::UDIV:
10646 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10647 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10648 return LowerUDIV(Op, DAG, Subtarget);
10649 case ISD::UADDO_CARRY:
10650 case ISD::USUBO_CARRY:
10651 return LowerUADDSUBO_CARRY(Op, DAG);
10652 case ISD::SADDO:
10653 case ISD::SSUBO:
10654 return LowerSignedALUO(Op, DAG);
10655 case ISD::UADDO:
10656 case ISD::USUBO:
10657 return LowerUnsignedALUO(Op, DAG);
10658 case ISD::SADDSAT:
10659 case ISD::SSUBSAT:
10660 case ISD::UADDSAT:
10661 case ISD::USUBSAT:
10662 return LowerADDSUBSAT(Op, DAG, Subtarget);
10663 case ISD::LOAD:
10664 return LowerPredicateLoad(Op, DAG);
10665 case ISD::STORE:
10666 return LowerSTORE(Op, DAG, Subtarget);
10667 case ISD::MLOAD:
10668 return LowerMLOAD(Op, DAG);
10669 case ISD::VECREDUCE_MUL:
10670 case ISD::VECREDUCE_AND:
10671 case ISD::VECREDUCE_OR:
10672 case ISD::VECREDUCE_XOR:
10673 return LowerVecReduce(Op, DAG, Subtarget);
10678 return LowerVecReduceF(Op, DAG, Subtarget);
10683 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10684 case ISD::ATOMIC_LOAD:
10685 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10686 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10687 case ISD::SDIVREM:
10688 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10690 if (Subtarget->isTargetWindows())
10691 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10692 llvm_unreachable("Don't know how to custom lower this!");
10694 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10696 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10697 case ISD::STRICT_FSETCC:
10698 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10699 case ISD::SPONENTRY:
10700 return LowerSPONENTRY(Op, DAG);
10701 case ARMISD::WIN__DBZCHK: return SDValue();
10702 }
10703}
10704
10706 SelectionDAG &DAG) {
10707 unsigned IntNo = N->getConstantOperandVal(0);
10708 unsigned Opc = 0;
10709 if (IntNo == Intrinsic::arm_smlald)
10710 Opc = ARMISD::SMLALD;
10711 else if (IntNo == Intrinsic::arm_smlaldx)
10712 Opc = ARMISD::SMLALDX;
10713 else if (IntNo == Intrinsic::arm_smlsld)
10714 Opc = ARMISD::SMLSLD;
10715 else if (IntNo == Intrinsic::arm_smlsldx)
10716 Opc = ARMISD::SMLSLDX;
10717 else
10718 return;
10719
10720 SDLoc dl(N);
10721 SDValue Lo, Hi;
10722 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10723
10724 SDValue LongMul = DAG.getNode(Opc, dl,
10725 DAG.getVTList(MVT::i32, MVT::i32),
10726 N->getOperand(1), N->getOperand(2),
10727 Lo, Hi);
10728 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10729 LongMul.getValue(0), LongMul.getValue(1)));
10730}
10731
10732/// ReplaceNodeResults - Replace the results of node with an illegal result
10733/// type with new values built out of custom code.
10736 SelectionDAG &DAG) const {
10737 SDValue Res;
10738 switch (N->getOpcode()) {
10739 default:
10740 llvm_unreachable("Don't know how to custom expand this!");
10741 case ISD::READ_REGISTER:
10743 break;
10744 case ISD::BITCAST:
10745 Res = ExpandBITCAST(N, DAG, Subtarget);
10746 break;
10747 case ISD::SRL:
10748 case ISD::SRA:
10749 case ISD::SHL:
10750 Res = Expand64BitShift(N, DAG, Subtarget);
10751 break;
10752 case ISD::SREM:
10753 case ISD::UREM:
10754 Res = LowerREM(N, DAG);
10755 break;
10756 case ISD::SDIVREM:
10757 case ISD::UDIVREM:
10758 Res = LowerDivRem(SDValue(N, 0), DAG);
10759 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10760 Results.push_back(Res.getValue(0));
10761 Results.push_back(Res.getValue(1));
10762 return;
10763 case ISD::SADDSAT:
10764 case ISD::SSUBSAT:
10765 case ISD::UADDSAT:
10766 case ISD::USUBSAT:
10767 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10768 break;
10770 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10771 return;
10772 case ISD::UDIV:
10773 case ISD::SDIV:
10774 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10775 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10776 Results);
10779 return;
10781 return ReplaceLongIntrinsic(N, Results, DAG);
10782 case ISD::LOAD:
10783 LowerLOAD(N, Results, DAG);
10784 break;
10785 case ISD::TRUNCATE:
10786 Res = LowerTruncate(N, DAG, Subtarget);
10787 break;
10788 case ISD::SIGN_EXTEND:
10789 case ISD::ZERO_EXTEND:
10790 Res = LowerVectorExtend(N, DAG, Subtarget);
10791 break;
10794 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10795 break;
10796 }
10797 if (Res.getNode())
10798 Results.push_back(Res);
10799}
10800
10801//===----------------------------------------------------------------------===//
10802// ARM Scheduler Hooks
10803//===----------------------------------------------------------------------===//
10804
10805/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10806/// registers the function context.
10807void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10809 MachineBasicBlock *DispatchBB,
10810 int FI) const {
10811 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10812 "ROPI/RWPI not currently supported with SjLj");
10813 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10814 DebugLoc dl = MI.getDebugLoc();
10815 MachineFunction *MF = MBB->getParent();
10819 const Function &F = MF->getFunction();
10820
10821 bool isThumb = Subtarget->isThumb();
10822 bool isThumb2 = Subtarget->isThumb2();
10823
10824 unsigned PCLabelId = AFI->createPICLabelUId();
10825 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10827 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10828 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10829
10830 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10831 : &ARM::GPRRegClass;
10832
10833 // Grab constant pool and fixed stack memory operands.
10834 MachineMemOperand *CPMMO =
10837
10838 MachineMemOperand *FIMMOSt =
10841
10842 // Load the address of the dispatch MBB into the jump buffer.
10843 if (isThumb2) {
10844 // Incoming value: jbuf
10845 // ldr.n r5, LCPI1_1
10846 // orr r5, r5, #1
10847 // add r5, pc
10848 // str r5, [$jbuf, #+4] ; &jbuf[1]
10849 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10850 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10852 .addMemOperand(CPMMO)
10854 // Set the low bit because of thumb mode.
10855 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10856 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10857 .addReg(NewVReg1, RegState::Kill)
10858 .addImm(0x01)
10860 .add(condCodeOp());
10861 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10862 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10863 .addReg(NewVReg2, RegState::Kill)
10864 .addImm(PCLabelId);
10865 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10866 .addReg(NewVReg3, RegState::Kill)
10867 .addFrameIndex(FI)
10868 .addImm(36) // &jbuf[1] :: pc
10869 .addMemOperand(FIMMOSt)
10871 } else if (isThumb) {
10872 // Incoming value: jbuf
10873 // ldr.n r1, LCPI1_4
10874 // add r1, pc
10875 // mov r2, #1
10876 // orrs r1, r2
10877 // add r2, $jbuf, #+4 ; &jbuf[1]
10878 // str r1, [r2]
10879 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10880 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10882 .addMemOperand(CPMMO)
10884 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10885 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10886 .addReg(NewVReg1, RegState::Kill)
10887 .addImm(PCLabelId);
10888 // Set the low bit because of thumb mode.
10889 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10890 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10891 .addReg(ARM::CPSR, RegState::Define)
10892 .addImm(1)
10894 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10895 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10896 .addReg(ARM::CPSR, RegState::Define)
10897 .addReg(NewVReg2, RegState::Kill)
10898 .addReg(NewVReg3, RegState::Kill)
10900 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10901 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10902 .addFrameIndex(FI)
10903 .addImm(36); // &jbuf[1] :: pc
10904 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10905 .addReg(NewVReg4, RegState::Kill)
10906 .addReg(NewVReg5, RegState::Kill)
10907 .addImm(0)
10908 .addMemOperand(FIMMOSt)
10910 } else {
10911 // Incoming value: jbuf
10912 // ldr r1, LCPI1_1
10913 // add r1, pc, r1
10914 // str r1, [$jbuf, #+4] ; &jbuf[1]
10915 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10916 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10918 .addImm(0)
10919 .addMemOperand(CPMMO)
10921 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10922 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10923 .addReg(NewVReg1, RegState::Kill)
10924 .addImm(PCLabelId)
10926 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10927 .addReg(NewVReg2, RegState::Kill)
10928 .addFrameIndex(FI)
10929 .addImm(36) // &jbuf[1] :: pc
10930 .addMemOperand(FIMMOSt)
10932 }
10933}
10934
10935void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10936 MachineBasicBlock *MBB) const {
10937 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10938 DebugLoc dl = MI.getDebugLoc();
10939 MachineFunction *MF = MBB->getParent();
10941 MachineFrameInfo &MFI = MF->getFrameInfo();
10942 int FI = MFI.getFunctionContextIndex();
10943
10944 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10945 : &ARM::GPRnopcRegClass;
10946
10947 // Get a mapping of the call site numbers to all of the landing pads they're
10948 // associated with.
10950 unsigned MaxCSNum = 0;
10951 for (MachineBasicBlock &BB : *MF) {
10952 if (!BB.isEHPad())
10953 continue;
10954
10955 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10956 // pad.
10957 for (MachineInstr &II : BB) {
10958 if (!II.isEHLabel())
10959 continue;
10960
10961 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10962 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10963
10964 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10965 for (unsigned Idx : CallSiteIdxs) {
10966 CallSiteNumToLPad[Idx].push_back(&BB);
10967 MaxCSNum = std::max(MaxCSNum, Idx);
10968 }
10969 break;
10970 }
10971 }
10972
10973 // Get an ordered list of the machine basic blocks for the jump table.
10974 std::vector<MachineBasicBlock*> LPadList;
10976 LPadList.reserve(CallSiteNumToLPad.size());
10977 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10978 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10979 for (MachineBasicBlock *MBB : MBBList) {
10980 LPadList.push_back(MBB);
10981 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10982 }
10983 }
10984
10985 assert(!LPadList.empty() &&
10986 "No landing pad destinations for the dispatch jump table!");
10987
10988 // Create the jump table and associated information.
10990 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10991 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10992
10993 // Create the MBBs for the dispatch code.
10994
10995 // Shove the dispatch's address into the return slot in the function context.
10996 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10997 DispatchBB->setIsEHPad();
10998
10999 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11000 unsigned trap_opcode;
11001 if (Subtarget->isThumb())
11002 trap_opcode = ARM::tTRAP;
11003 else
11004 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
11005
11006 BuildMI(TrapBB, dl, TII->get(trap_opcode));
11007 DispatchBB->addSuccessor(TrapBB);
11008
11009 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11010 DispatchBB->addSuccessor(DispContBB);
11011
11012 // Insert and MBBs.
11013 MF->insert(MF->end(), DispatchBB);
11014 MF->insert(MF->end(), DispContBB);
11015 MF->insert(MF->end(), TrapBB);
11016
11017 // Insert code into the entry block that creates and registers the function
11018 // context.
11019 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11020
11021 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11024
11026 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11027
11028 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11029 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11030
11031 // Add a register mask with no preserved registers. This results in all
11032 // registers being marked as clobbered. This can't work if the dispatch block
11033 // is in a Thumb1 function and is linked with ARM code which uses the FP
11034 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11036
11037 bool IsPositionIndependent = isPositionIndependent();
11038 unsigned NumLPads = LPadList.size();
11039 if (Subtarget->isThumb2()) {
11040 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11041 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11042 .addFrameIndex(FI)
11043 .addImm(4)
11044 .addMemOperand(FIMMOLd)
11046
11047 if (NumLPads < 256) {
11048 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11049 .addReg(NewVReg1)
11050 .addImm(LPadList.size())
11052 } else {
11053 Register VReg1 = MRI->createVirtualRegister(TRC);
11054 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11055 .addImm(NumLPads & 0xFFFF)
11057
11058 unsigned VReg2 = VReg1;
11059 if ((NumLPads & 0xFFFF0000) != 0) {
11060 VReg2 = MRI->createVirtualRegister(TRC);
11061 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11062 .addReg(VReg1)
11063 .addImm(NumLPads >> 16)
11065 }
11066
11067 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11068 .addReg(NewVReg1)
11069 .addReg(VReg2)
11071 }
11072
11073 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11074 .addMBB(TrapBB)
11076 .addReg(ARM::CPSR);
11077
11078 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11079 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11080 .addJumpTableIndex(MJTI)
11082
11083 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11084 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11085 .addReg(NewVReg3, RegState::Kill)
11086 .addReg(NewVReg1)
11089 .add(condCodeOp());
11090
11091 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11092 .addReg(NewVReg4, RegState::Kill)
11093 .addReg(NewVReg1)
11094 .addJumpTableIndex(MJTI);
11095 } else if (Subtarget->isThumb()) {
11096 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11097 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11098 .addFrameIndex(FI)
11099 .addImm(1)
11100 .addMemOperand(FIMMOLd)
11102
11103 if (NumLPads < 256) {
11104 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11105 .addReg(NewVReg1)
11106 .addImm(NumLPads)
11108 } else {
11109 MachineConstantPool *ConstantPool = MF->getConstantPool();
11110 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11111 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11112
11113 // MachineConstantPool wants an explicit alignment.
11114 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11115 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11116
11117 Register VReg1 = MRI->createVirtualRegister(TRC);
11118 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11119 .addReg(VReg1, RegState::Define)
11122 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11123 .addReg(NewVReg1)
11124 .addReg(VReg1)
11126 }
11127
11128 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11129 .addMBB(TrapBB)
11131 .addReg(ARM::CPSR);
11132
11133 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11134 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11135 .addReg(ARM::CPSR, RegState::Define)
11136 .addReg(NewVReg1)
11137 .addImm(2)
11139
11140 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11141 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11142 .addJumpTableIndex(MJTI)
11144
11145 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11146 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11147 .addReg(ARM::CPSR, RegState::Define)
11148 .addReg(NewVReg2, RegState::Kill)
11149 .addReg(NewVReg3)
11151
11152 MachineMemOperand *JTMMOLd =
11153 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11155
11156 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11157 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11158 .addReg(NewVReg4, RegState::Kill)
11159 .addImm(0)
11160 .addMemOperand(JTMMOLd)
11162
11163 unsigned NewVReg6 = NewVReg5;
11164 if (IsPositionIndependent) {
11165 NewVReg6 = MRI->createVirtualRegister(TRC);
11166 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11167 .addReg(ARM::CPSR, RegState::Define)
11168 .addReg(NewVReg5, RegState::Kill)
11169 .addReg(NewVReg3)
11171 }
11172
11173 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11174 .addReg(NewVReg6, RegState::Kill)
11175 .addJumpTableIndex(MJTI);
11176 } else {
11177 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11178 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11179 .addFrameIndex(FI)
11180 .addImm(4)
11181 .addMemOperand(FIMMOLd)
11183
11184 if (NumLPads < 256) {
11185 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11186 .addReg(NewVReg1)
11187 .addImm(NumLPads)
11189 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11190 Register VReg1 = MRI->createVirtualRegister(TRC);
11191 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11192 .addImm(NumLPads & 0xFFFF)
11194
11195 unsigned VReg2 = VReg1;
11196 if ((NumLPads & 0xFFFF0000) != 0) {
11197 VReg2 = MRI->createVirtualRegister(TRC);
11198 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11199 .addReg(VReg1)
11200 .addImm(NumLPads >> 16)
11202 }
11203
11204 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11205 .addReg(NewVReg1)
11206 .addReg(VReg2)
11208 } else {
11209 MachineConstantPool *ConstantPool = MF->getConstantPool();
11210 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11211 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11212
11213 // MachineConstantPool wants an explicit alignment.
11214 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11215 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11216
11217 Register VReg1 = MRI->createVirtualRegister(TRC);
11218 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11219 .addReg(VReg1, RegState::Define)
11221 .addImm(0)
11223 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11224 .addReg(NewVReg1)
11225 .addReg(VReg1, RegState::Kill)
11227 }
11228
11229 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11230 .addMBB(TrapBB)
11232 .addReg(ARM::CPSR);
11233
11234 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11235 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11236 .addReg(NewVReg1)
11239 .add(condCodeOp());
11240 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11241 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11242 .addJumpTableIndex(MJTI)
11244
11245 MachineMemOperand *JTMMOLd =
11246 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11248 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11249 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11250 .addReg(NewVReg3, RegState::Kill)
11251 .addReg(NewVReg4)
11252 .addImm(0)
11253 .addMemOperand(JTMMOLd)
11255
11256 if (IsPositionIndependent) {
11257 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11258 .addReg(NewVReg5, RegState::Kill)
11259 .addReg(NewVReg4)
11260 .addJumpTableIndex(MJTI);
11261 } else {
11262 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11263 .addReg(NewVReg5, RegState::Kill)
11264 .addJumpTableIndex(MJTI);
11265 }
11266 }
11267
11268 // Add the jump table entries as successors to the MBB.
11270 for (MachineBasicBlock *CurMBB : LPadList) {
11271 if (SeenMBBs.insert(CurMBB).second)
11272 DispContBB->addSuccessor(CurMBB);
11273 }
11274
11275 // N.B. the order the invoke BBs are processed in doesn't matter here.
11276 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11278 for (MachineBasicBlock *BB : InvokeBBs) {
11279
11280 // Remove the landing pad successor from the invoke block and replace it
11281 // with the new dispatch block.
11282 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11283 while (!Successors.empty()) {
11284 MachineBasicBlock *SMBB = Successors.pop_back_val();
11285 if (SMBB->isEHPad()) {
11286 BB->removeSuccessor(SMBB);
11287 MBBLPads.push_back(SMBB);
11288 }
11289 }
11290
11291 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11292 BB->normalizeSuccProbs();
11293
11294 // Find the invoke call and mark all of the callee-saved registers as
11295 // 'implicit defined' so that they're spilled. This prevents code from
11296 // moving instructions to before the EH block, where they will never be
11297 // executed.
11299 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11300 if (!II->isCall()) continue;
11301
11304 OI = II->operands_begin(), OE = II->operands_end();
11305 OI != OE; ++OI) {
11306 if (!OI->isReg()) continue;
11307 DefRegs[OI->getReg()] = true;
11308 }
11309
11310 MachineInstrBuilder MIB(*MF, &*II);
11311
11312 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11313 unsigned Reg = SavedRegs[i];
11314 if (Subtarget->isThumb2() &&
11315 !ARM::tGPRRegClass.contains(Reg) &&
11316 !ARM::hGPRRegClass.contains(Reg))
11317 continue;
11318 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11319 continue;
11320 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11321 continue;
11322 if (!DefRegs[Reg])
11324 }
11325
11326 break;
11327 }
11328 }
11329
11330 // Mark all former landing pads as non-landing pads. The dispatch is the only
11331 // landing pad now.
11332 for (MachineBasicBlock *MBBLPad : MBBLPads)
11333 MBBLPad->setIsEHPad(false);
11334
11335 // The instruction is gone now.
11336 MI.eraseFromParent();
11337}
11338
11339static
11341 for (MachineBasicBlock *S : MBB->successors())
11342 if (S != Succ)
11343 return S;
11344 llvm_unreachable("Expecting a BB with two successors!");
11345}
11346
11347/// Return the load opcode for a given load size. If load size >= 8,
11348/// neon opcode will be returned.
11349static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11350 if (LdSize >= 8)
11351 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11352 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11353 if (IsThumb1)
11354 return LdSize == 4 ? ARM::tLDRi
11355 : LdSize == 2 ? ARM::tLDRHi
11356 : LdSize == 1 ? ARM::tLDRBi : 0;
11357 if (IsThumb2)
11358 return LdSize == 4 ? ARM::t2LDR_POST
11359 : LdSize == 2 ? ARM::t2LDRH_POST
11360 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11361 return LdSize == 4 ? ARM::LDR_POST_IMM
11362 : LdSize == 2 ? ARM::LDRH_POST
11363 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11364}
11365
11366/// Return the store opcode for a given store size. If store size >= 8,
11367/// neon opcode will be returned.
11368static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11369 if (StSize >= 8)
11370 return StSize == 16 ? ARM::VST1q32wb_fixed
11371 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11372 if (IsThumb1)
11373 return StSize == 4 ? ARM::tSTRi
11374 : StSize == 2 ? ARM::tSTRHi
11375 : StSize == 1 ? ARM::tSTRBi : 0;
11376 if (IsThumb2)
11377 return StSize == 4 ? ARM::t2STR_POST
11378 : StSize == 2 ? ARM::t2STRH_POST
11379 : StSize == 1 ? ARM::t2STRB_POST : 0;
11380 return StSize == 4 ? ARM::STR_POST_IMM
11381 : StSize == 2 ? ARM::STRH_POST
11382 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11383}
11384
11385/// Emit a post-increment load operation with given size. The instructions
11386/// will be added to BB at Pos.
11388 const TargetInstrInfo *TII, const DebugLoc &dl,
11389 unsigned LdSize, unsigned Data, unsigned AddrIn,
11390 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11391 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11392 assert(LdOpc != 0 && "Should have a load opcode");
11393 if (LdSize >= 8) {
11394 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11395 .addReg(AddrOut, RegState::Define)
11396 .addReg(AddrIn)
11397 .addImm(0)
11399 } else if (IsThumb1) {
11400 // load + update AddrIn
11401 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11402 .addReg(AddrIn)
11403 .addImm(0)
11405 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11406 .add(t1CondCodeOp())
11407 .addReg(AddrIn)
11408 .addImm(LdSize)
11410 } else if (IsThumb2) {
11411 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11412 .addReg(AddrOut, RegState::Define)
11413 .addReg(AddrIn)
11414 .addImm(LdSize)
11416 } else { // arm
11417 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11418 .addReg(AddrOut, RegState::Define)
11419 .addReg(AddrIn)
11420 .addReg(0)
11421 .addImm(LdSize)
11423 }
11424}
11425
11426/// Emit a post-increment store operation with given size. The instructions
11427/// will be added to BB at Pos.
11429 const TargetInstrInfo *TII, const DebugLoc &dl,
11430 unsigned StSize, unsigned Data, unsigned AddrIn,
11431 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11432 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11433 assert(StOpc != 0 && "Should have a store opcode");
11434 if (StSize >= 8) {
11435 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11436 .addReg(AddrIn)
11437 .addImm(0)
11438 .addReg(Data)
11440 } else if (IsThumb1) {
11441 // store + update AddrIn
11442 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11443 .addReg(Data)
11444 .addReg(AddrIn)
11445 .addImm(0)
11447 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11448 .add(t1CondCodeOp())
11449 .addReg(AddrIn)
11450 .addImm(StSize)
11452 } else if (IsThumb2) {
11453 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11454 .addReg(Data)
11455 .addReg(AddrIn)
11456 .addImm(StSize)
11458 } else { // arm
11459 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11460 .addReg(Data)
11461 .addReg(AddrIn)
11462 .addReg(0)
11463 .addImm(StSize)
11465 }
11466}
11467
11469ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11470 MachineBasicBlock *BB) const {
11471 // This pseudo instruction has 3 operands: dst, src, size
11472 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11473 // Otherwise, we will generate unrolled scalar copies.
11474 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11475 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11477
11478 Register dest = MI.getOperand(0).getReg();
11479 Register src = MI.getOperand(1).getReg();
11480 unsigned SizeVal = MI.getOperand(2).getImm();
11481 unsigned Alignment = MI.getOperand(3).getImm();
11482 DebugLoc dl = MI.getDebugLoc();
11483
11484 MachineFunction *MF = BB->getParent();
11486 unsigned UnitSize = 0;
11487 const TargetRegisterClass *TRC = nullptr;
11488 const TargetRegisterClass *VecTRC = nullptr;
11489
11490 bool IsThumb1 = Subtarget->isThumb1Only();
11491 bool IsThumb2 = Subtarget->isThumb2();
11492 bool IsThumb = Subtarget->isThumb();
11493
11494 if (Alignment & 1) {
11495 UnitSize = 1;
11496 } else if (Alignment & 2) {
11497 UnitSize = 2;
11498 } else {
11499 // Check whether we can use NEON instructions.
11500 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11501 Subtarget->hasNEON()) {
11502 if ((Alignment % 16 == 0) && SizeVal >= 16)
11503 UnitSize = 16;
11504 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11505 UnitSize = 8;
11506 }
11507 // Can't use NEON instructions.
11508 if (UnitSize == 0)
11509 UnitSize = 4;
11510 }
11511
11512 // Select the correct opcode and register class for unit size load/store
11513 bool IsNeon = UnitSize >= 8;
11514 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11515 if (IsNeon)
11516 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11517 : UnitSize == 8 ? &ARM::DPRRegClass
11518 : nullptr;
11519
11520 unsigned BytesLeft = SizeVal % UnitSize;
11521 unsigned LoopSize = SizeVal - BytesLeft;
11522
11523 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11524 // Use LDR and STR to copy.
11525 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11526 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11527 unsigned srcIn = src;
11528 unsigned destIn = dest;
11529 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11530 Register srcOut = MRI.createVirtualRegister(TRC);
11531 Register destOut = MRI.createVirtualRegister(TRC);
11532 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11533 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11534 IsThumb1, IsThumb2);
11535 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11536 IsThumb1, IsThumb2);
11537 srcIn = srcOut;
11538 destIn = destOut;
11539 }
11540
11541 // Handle the leftover bytes with LDRB and STRB.
11542 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11543 // [destOut] = STRB_POST(scratch, destIn, 1)
11544 for (unsigned i = 0; i < BytesLeft; i++) {
11545 Register srcOut = MRI.createVirtualRegister(TRC);
11546 Register destOut = MRI.createVirtualRegister(TRC);
11547 Register scratch = MRI.createVirtualRegister(TRC);
11548 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11549 IsThumb1, IsThumb2);
11550 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11551 IsThumb1, IsThumb2);
11552 srcIn = srcOut;
11553 destIn = destOut;
11554 }
11555 MI.eraseFromParent(); // The instruction is gone now.
11556 return BB;
11557 }
11558
11559 // Expand the pseudo op to a loop.
11560 // thisMBB:
11561 // ...
11562 // movw varEnd, # --> with thumb2
11563 // movt varEnd, #
11564 // ldrcp varEnd, idx --> without thumb2
11565 // fallthrough --> loopMBB
11566 // loopMBB:
11567 // PHI varPhi, varEnd, varLoop
11568 // PHI srcPhi, src, srcLoop
11569 // PHI destPhi, dst, destLoop
11570 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11571 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11572 // subs varLoop, varPhi, #UnitSize
11573 // bne loopMBB
11574 // fallthrough --> exitMBB
11575 // exitMBB:
11576 // epilogue to handle left-over bytes
11577 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11578 // [destOut] = STRB_POST(scratch, destLoop, 1)
11579 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11580 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11581 MF->insert(It, loopMBB);
11582 MF->insert(It, exitMBB);
11583
11584 // Set the call frame size on entry to the new basic blocks.
11585 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11586 loopMBB->setCallFrameSize(CallFrameSize);
11587 exitMBB->setCallFrameSize(CallFrameSize);
11588
11589 // Transfer the remainder of BB and its successor edges to exitMBB.
11590 exitMBB->splice(exitMBB->begin(), BB,
11591 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11593
11594 // Load an immediate to varEnd.
11595 Register varEnd = MRI.createVirtualRegister(TRC);
11596 if (Subtarget->useMovt()) {
11597 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11598 varEnd)
11599 .addImm(LoopSize);
11600 } else if (Subtarget->genExecuteOnly()) {
11601 assert(IsThumb && "Non-thumb expected to have used movt");
11602 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11603 } else {
11605 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11606 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11607
11608 // MachineConstantPool wants an explicit alignment.
11609 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11610 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11611 MachineMemOperand *CPMMO =
11614
11615 if (IsThumb)
11616 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11617 .addReg(varEnd, RegState::Define)
11620 .addMemOperand(CPMMO);
11621 else
11622 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11623 .addReg(varEnd, RegState::Define)
11625 .addImm(0)
11627 .addMemOperand(CPMMO);
11628 }
11629 BB->addSuccessor(loopMBB);
11630
11631 // Generate the loop body:
11632 // varPhi = PHI(varLoop, varEnd)
11633 // srcPhi = PHI(srcLoop, src)
11634 // destPhi = PHI(destLoop, dst)
11635 MachineBasicBlock *entryBB = BB;
11636 BB = loopMBB;
11637 Register varLoop = MRI.createVirtualRegister(TRC);
11638 Register varPhi = MRI.createVirtualRegister(TRC);
11639 Register srcLoop = MRI.createVirtualRegister(TRC);
11640 Register srcPhi = MRI.createVirtualRegister(TRC);
11641 Register destLoop = MRI.createVirtualRegister(TRC);
11642 Register destPhi = MRI.createVirtualRegister(TRC);
11643
11644 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11645 .addReg(varLoop).addMBB(loopMBB)
11646 .addReg(varEnd).addMBB(entryBB);
11647 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11648 .addReg(srcLoop).addMBB(loopMBB)
11649 .addReg(src).addMBB(entryBB);
11650 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11651 .addReg(destLoop).addMBB(loopMBB)
11652 .addReg(dest).addMBB(entryBB);
11653
11654 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11655 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11656 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11657 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11658 IsThumb1, IsThumb2);
11659 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11660 IsThumb1, IsThumb2);
11661
11662 // Decrement loop variable by UnitSize.
11663 if (IsThumb1) {
11664 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11665 .add(t1CondCodeOp())
11666 .addReg(varPhi)
11667 .addImm(UnitSize)
11669 } else {
11671 BuildMI(*BB, BB->end(), dl,
11672 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11673 MIB.addReg(varPhi)
11674 .addImm(UnitSize)
11676 .add(condCodeOp());
11677 MIB->getOperand(5).setReg(ARM::CPSR);
11678 MIB->getOperand(5).setIsDef(true);
11679 }
11680 BuildMI(*BB, BB->end(), dl,
11681 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11682 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11683
11684 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11685 BB->addSuccessor(loopMBB);
11686 BB->addSuccessor(exitMBB);
11687
11688 // Add epilogue to handle BytesLeft.
11689 BB = exitMBB;
11690 auto StartOfExit = exitMBB->begin();
11691
11692 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11693 // [destOut] = STRB_POST(scratch, destLoop, 1)
11694 unsigned srcIn = srcLoop;
11695 unsigned destIn = destLoop;
11696 for (unsigned i = 0; i < BytesLeft; i++) {
11697 Register srcOut = MRI.createVirtualRegister(TRC);
11698 Register destOut = MRI.createVirtualRegister(TRC);
11699 Register scratch = MRI.createVirtualRegister(TRC);
11700 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11701 IsThumb1, IsThumb2);
11702 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11703 IsThumb1, IsThumb2);
11704 srcIn = srcOut;
11705 destIn = destOut;
11706 }
11707
11708 MI.eraseFromParent(); // The instruction is gone now.
11709 return BB;
11710}
11711
11713ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11714 MachineBasicBlock *MBB) const {
11716 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11717 DebugLoc DL = MI.getDebugLoc();
11718
11719 assert(Subtarget->isTargetWindows() &&
11720 "__chkstk is only supported on Windows");
11721 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11722
11723 // __chkstk takes the number of words to allocate on the stack in R4, and
11724 // returns the stack adjustment in number of bytes in R4. This will not
11725 // clober any other registers (other than the obvious lr).
11726 //
11727 // Although, technically, IP should be considered a register which may be
11728 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11729 // thumb-2 environment, so there is no interworking required. As a result, we
11730 // do not expect a veneer to be emitted by the linker, clobbering IP.
11731 //
11732 // Each module receives its own copy of __chkstk, so no import thunk is
11733 // required, again, ensuring that IP is not clobbered.
11734 //
11735 // Finally, although some linkers may theoretically provide a trampoline for
11736 // out of range calls (which is quite common due to a 32M range limitation of
11737 // branches for Thumb), we can generate the long-call version via
11738 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11739 // IP.
11740
11741 switch (TM.getCodeModel()) {
11742 case CodeModel::Tiny:
11743 llvm_unreachable("Tiny code model not available on ARM.");
11744 case CodeModel::Small:
11745 case CodeModel::Medium:
11746 case CodeModel::Kernel:
11747 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11749 .addExternalSymbol("__chkstk")
11752 .addReg(ARM::R12,
11754 .addReg(ARM::CPSR,
11756 break;
11757 case CodeModel::Large: {
11759 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11760
11761 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11762 .addExternalSymbol("__chkstk");
11765 .addReg(Reg, RegState::Kill)
11768 .addReg(ARM::R12,
11770 .addReg(ARM::CPSR,
11772 break;
11773 }
11774 }
11775
11776 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11777 .addReg(ARM::SP, RegState::Kill)
11778 .addReg(ARM::R4, RegState::Kill)
11781 .add(condCodeOp());
11782
11783 MI.eraseFromParent();
11784 return MBB;
11785}
11786
11788ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11789 MachineBasicBlock *MBB) const {
11790 DebugLoc DL = MI.getDebugLoc();
11791 MachineFunction *MF = MBB->getParent();
11792 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11793
11795 MF->insert(++MBB->getIterator(), ContBB);
11796 ContBB->splice(ContBB->begin(), MBB,
11797 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11799 MBB->addSuccessor(ContBB);
11800
11802 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11803 MF->push_back(TrapBB);
11804 MBB->addSuccessor(TrapBB);
11805
11806 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11807 .addReg(MI.getOperand(0).getReg())
11808 .addImm(0)
11810 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11811 .addMBB(TrapBB)
11813 .addReg(ARM::CPSR);
11814
11815 MI.eraseFromParent();
11816 return ContBB;
11817}
11818
11819// The CPSR operand of SelectItr might be missing a kill marker
11820// because there were multiple uses of CPSR, and ISel didn't know
11821// which to mark. Figure out whether SelectItr should have had a
11822// kill marker, and set it if it should. Returns the correct kill
11823// marker value.
11826 const TargetRegisterInfo* TRI) {
11827 // Scan forward through BB for a use/def of CPSR.
11828 MachineBasicBlock::iterator miI(std::next(SelectItr));
11829 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11830 const MachineInstr& mi = *miI;
11831 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11832 return false;
11833 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11834 break; // Should have kill-flag - update below.
11835 }
11836
11837 // If we hit the end of the block, check whether CPSR is live into a
11838 // successor.
11839 if (miI == BB->end()) {
11840 for (MachineBasicBlock *Succ : BB->successors())
11841 if (Succ->isLiveIn(ARM::CPSR))
11842 return false;
11843 }
11844
11845 // We found a def, or hit the end of the basic block and CPSR wasn't live
11846 // out. SelectMI should have a kill flag on CPSR.
11847 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11848 return true;
11849}
11850
11851/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11852/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11854 MachineBasicBlock *TpLoopBody,
11855 MachineBasicBlock *TpExit, Register OpSizeReg,
11856 const TargetInstrInfo *TII, DebugLoc Dl,
11858 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11859 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11860 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11861 .addUse(OpSizeReg)
11862 .addImm(15)
11864 .addReg(0);
11865
11866 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11867 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11868 .addUse(AddDestReg, RegState::Kill)
11869 .addImm(4)
11871 .addReg(0);
11872
11873 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11874 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11875 .addUse(LsrDestReg, RegState::Kill);
11876
11877 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11878 .addUse(TotalIterationsReg)
11879 .addMBB(TpExit);
11880
11881 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11882 .addMBB(TpLoopBody)
11884
11885 return TotalIterationsReg;
11886}
11887
11888/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11889/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11890/// loops.
11891static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11892 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11893 const TargetInstrInfo *TII, DebugLoc Dl,
11894 MachineRegisterInfo &MRI, Register OpSrcReg,
11895 Register OpDestReg, Register ElementCountReg,
11896 Register TotalIterationsReg, bool IsMemcpy) {
11897 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11898 // array, loop iteration counter, predication counter.
11899
11900 Register SrcPhiReg, CurrSrcReg;
11901 if (IsMemcpy) {
11902 // Current position in the src array
11903 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11904 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11905 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11906 .addUse(OpSrcReg)
11907 .addMBB(TpEntry)
11908 .addUse(CurrSrcReg)
11909 .addMBB(TpLoopBody);
11910 }
11911
11912 // Current position in the dest array
11913 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11914 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11915 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11916 .addUse(OpDestReg)
11917 .addMBB(TpEntry)
11918 .addUse(CurrDestReg)
11919 .addMBB(TpLoopBody);
11920
11921 // Current loop counter
11922 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11923 Register RemainingLoopIterationsReg =
11924 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11925 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11926 .addUse(TotalIterationsReg)
11927 .addMBB(TpEntry)
11928 .addUse(RemainingLoopIterationsReg)
11929 .addMBB(TpLoopBody);
11930
11931 // Predication counter
11932 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11933 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11934 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11935 .addUse(ElementCountReg)
11936 .addMBB(TpEntry)
11937 .addUse(RemainingElementsReg)
11938 .addMBB(TpLoopBody);
11939
11940 // Pass predication counter to VCTP
11941 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11942 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11943 .addUse(PredCounterPhiReg)
11945 .addReg(0)
11946 .addReg(0);
11947
11948 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11949 .addUse(PredCounterPhiReg)
11950 .addImm(16)
11952 .addReg(0);
11953
11954 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11955 Register SrcValueReg;
11956 if (IsMemcpy) {
11957 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11958 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11959 .addDef(CurrSrcReg)
11960 .addDef(SrcValueReg)
11961 .addReg(SrcPhiReg)
11962 .addImm(16)
11964 .addUse(VccrReg)
11965 .addReg(0);
11966 } else
11967 SrcValueReg = OpSrcReg;
11968
11969 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11970 .addDef(CurrDestReg)
11971 .addUse(SrcValueReg)
11972 .addReg(DestPhiReg)
11973 .addImm(16)
11975 .addUse(VccrReg)
11976 .addReg(0);
11977
11978 // Add the pseudoInstrs for decrementing the loop counter and marking the
11979 // end:t2DoLoopDec and t2DoLoopEnd
11980 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11981 .addUse(LoopCounterPhiReg)
11982 .addImm(1);
11983
11984 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11985 .addUse(RemainingLoopIterationsReg)
11986 .addMBB(TpLoopBody);
11987
11988 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11989 .addMBB(TpExit)
11991}
11992
11995 MachineBasicBlock *BB) const {
11996 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11997 DebugLoc dl = MI.getDebugLoc();
11998 bool isThumb2 = Subtarget->isThumb2();
11999 switch (MI.getOpcode()) {
12000 default: {
12001 MI.print(errs());
12002 llvm_unreachable("Unexpected instr type to insert");
12003 }
12004
12005 // Thumb1 post-indexed loads are really just single-register LDMs.
12006 case ARM::tLDR_postidx: {
12007 MachineOperand Def(MI.getOperand(1));
12008 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12009 .add(Def) // Rn_wb
12010 .add(MI.getOperand(2)) // Rn
12011 .add(MI.getOperand(3)) // PredImm
12012 .add(MI.getOperand(4)) // PredReg
12013 .add(MI.getOperand(0)) // Rt
12014 .cloneMemRefs(MI);
12015 MI.eraseFromParent();
12016 return BB;
12017 }
12018
12019 case ARM::MVE_MEMCPYLOOPINST:
12020 case ARM::MVE_MEMSETLOOPINST: {
12021
12022 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12023 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12024 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12025 // adds the relevant instructions in the TP loop Body for generation of a
12026 // WLSTP loop.
12027
12028 // Below is relevant portion of the CFG after the transformation.
12029 // The Machine Basic Blocks are shown along with branch conditions (in
12030 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12031 // portion of the CFG and may not necessarily be the entry/exit of the
12032 // function.
12033
12034 // (Relevant) CFG after transformation:
12035 // TP entry MBB
12036 // |
12037 // |-----------------|
12038 // (n <= 0) (n > 0)
12039 // | |
12040 // | TP loop Body MBB<--|
12041 // | | |
12042 // \ |___________|
12043 // \ /
12044 // TP exit MBB
12045
12046 MachineFunction *MF = BB->getParent();
12047 MachineFunctionProperties &Properties = MF->getProperties();
12049
12050 Register OpDestReg = MI.getOperand(0).getReg();
12051 Register OpSrcReg = MI.getOperand(1).getReg();
12052 Register OpSizeReg = MI.getOperand(2).getReg();
12053
12054 // Allocate the required MBBs and add to parent function.
12055 MachineBasicBlock *TpEntry = BB;
12056 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12057 MachineBasicBlock *TpExit;
12058
12059 MF->push_back(TpLoopBody);
12060
12061 // If any instructions are present in the current block after
12062 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12063 // move the instructions into the newly created exit block. If there are no
12064 // instructions add an explicit branch to the FallThrough block and then
12065 // split.
12066 //
12067 // The split is required for two reasons:
12068 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12069 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12070 // need to be updated. splitAt() already handles this.
12071 TpExit = BB->splitAt(MI, false);
12072 if (TpExit == BB) {
12073 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12074 "block containing memcpy/memset Pseudo");
12075 TpExit = BB->getFallThrough();
12076 BuildMI(BB, dl, TII->get(ARM::t2B))
12077 .addMBB(TpExit)
12079 TpExit = BB->splitAt(MI, false);
12080 }
12081
12082 // Add logic for iteration count
12083 Register TotalIterationsReg =
12084 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12085
12086 // Add the vectorized (and predicated) loads/store instructions
12087 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12088 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12089 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12090
12091 // Required to avoid conflict with the MachineVerifier during testing.
12093
12094 // Connect the blocks
12095 TpEntry->addSuccessor(TpLoopBody);
12096 TpLoopBody->addSuccessor(TpLoopBody);
12097 TpLoopBody->addSuccessor(TpExit);
12098
12099 // Reorder for a more natural layout
12100 TpLoopBody->moveAfter(TpEntry);
12101 TpExit->moveAfter(TpLoopBody);
12102
12103 // Finally, remove the memcpy Pseudo Instruction
12104 MI.eraseFromParent();
12105
12106 // Return the exit block as it may contain other instructions requiring a
12107 // custom inserter
12108 return TpExit;
12109 }
12110
12111 // The Thumb2 pre-indexed stores have the same MI operands, they just
12112 // define them differently in the .td files from the isel patterns, so
12113 // they need pseudos.
12114 case ARM::t2STR_preidx:
12115 MI.setDesc(TII->get(ARM::t2STR_PRE));
12116 return BB;
12117 case ARM::t2STRB_preidx:
12118 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12119 return BB;
12120 case ARM::t2STRH_preidx:
12121 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12122 return BB;
12123
12124 case ARM::STRi_preidx:
12125 case ARM::STRBi_preidx: {
12126 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12127 : ARM::STRB_PRE_IMM;
12128 // Decode the offset.
12129 unsigned Offset = MI.getOperand(4).getImm();
12130 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12132 if (isSub)
12133 Offset = -Offset;
12134
12135 MachineMemOperand *MMO = *MI.memoperands_begin();
12136 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12137 .add(MI.getOperand(0)) // Rn_wb
12138 .add(MI.getOperand(1)) // Rt
12139 .add(MI.getOperand(2)) // Rn
12140 .addImm(Offset) // offset (skip GPR==zero_reg)
12141 .add(MI.getOperand(5)) // pred
12142 .add(MI.getOperand(6))
12143 .addMemOperand(MMO);
12144 MI.eraseFromParent();
12145 return BB;
12146 }
12147 case ARM::STRr_preidx:
12148 case ARM::STRBr_preidx:
12149 case ARM::STRH_preidx: {
12150 unsigned NewOpc;
12151 switch (MI.getOpcode()) {
12152 default: llvm_unreachable("unexpected opcode!");
12153 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12154 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12155 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12156 }
12157 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12158 for (const MachineOperand &MO : MI.operands())
12159 MIB.add(MO);
12160 MI.eraseFromParent();
12161 return BB;
12162 }
12163
12164 case ARM::tMOVCCr_pseudo: {
12165 // To "insert" a SELECT_CC instruction, we actually have to insert the
12166 // diamond control-flow pattern. The incoming instruction knows the
12167 // destination vreg to set, the condition code register to branch on, the
12168 // true/false values to select between, and a branch opcode to use.
12169 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12171
12172 // thisMBB:
12173 // ...
12174 // TrueVal = ...
12175 // cmpTY ccX, r1, r2
12176 // bCC copy1MBB
12177 // fallthrough --> copy0MBB
12178 MachineBasicBlock *thisMBB = BB;
12179 MachineFunction *F = BB->getParent();
12180 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12181 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12182 F->insert(It, copy0MBB);
12183 F->insert(It, sinkMBB);
12184
12185 // Set the call frame size on entry to the new basic blocks.
12186 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12187 copy0MBB->setCallFrameSize(CallFrameSize);
12188 sinkMBB->setCallFrameSize(CallFrameSize);
12189
12190 // Check whether CPSR is live past the tMOVCCr_pseudo.
12191 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12192 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12193 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12194 copy0MBB->addLiveIn(ARM::CPSR);
12195 sinkMBB->addLiveIn(ARM::CPSR);
12196 }
12197
12198 // Transfer the remainder of BB and its successor edges to sinkMBB.
12199 sinkMBB->splice(sinkMBB->begin(), BB,
12200 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12202
12203 BB->addSuccessor(copy0MBB);
12204 BB->addSuccessor(sinkMBB);
12205
12206 BuildMI(BB, dl, TII->get(ARM::tBcc))
12207 .addMBB(sinkMBB)
12208 .addImm(MI.getOperand(3).getImm())
12209 .addReg(MI.getOperand(4).getReg());
12210
12211 // copy0MBB:
12212 // %FalseValue = ...
12213 // # fallthrough to sinkMBB
12214 BB = copy0MBB;
12215
12216 // Update machine-CFG edges
12217 BB->addSuccessor(sinkMBB);
12218
12219 // sinkMBB:
12220 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12221 // ...
12222 BB = sinkMBB;
12223 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12224 .addReg(MI.getOperand(1).getReg())
12225 .addMBB(copy0MBB)
12226 .addReg(MI.getOperand(2).getReg())
12227 .addMBB(thisMBB);
12228
12229 MI.eraseFromParent(); // The pseudo instruction is gone now.
12230 return BB;
12231 }
12232
12233 case ARM::BCCi64:
12234 case ARM::BCCZi64: {
12235 // If there is an unconditional branch to the other successor, remove it.
12236 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12237
12238 // Compare both parts that make up the double comparison separately for
12239 // equality.
12240 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12241
12242 Register LHS1 = MI.getOperand(1).getReg();
12243 Register LHS2 = MI.getOperand(2).getReg();
12244 if (RHSisZero) {
12245 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12246 .addReg(LHS1)
12247 .addImm(0)
12249 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12250 .addReg(LHS2).addImm(0)
12251 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12252 } else {
12253 Register RHS1 = MI.getOperand(3).getReg();
12254 Register RHS2 = MI.getOperand(4).getReg();
12255 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12256 .addReg(LHS1)
12257 .addReg(RHS1)
12259 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12260 .addReg(LHS2).addReg(RHS2)
12261 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12262 }
12263
12264 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12265 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12266 if (MI.getOperand(0).getImm() == ARMCC::NE)
12267 std::swap(destMBB, exitMBB);
12268
12269 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12270 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12271 if (isThumb2)
12272 BuildMI(BB, dl, TII->get(ARM::t2B))
12273 .addMBB(exitMBB)
12275 else
12276 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12277
12278 MI.eraseFromParent(); // The pseudo instruction is gone now.
12279 return BB;
12280 }
12281
12282 case ARM::Int_eh_sjlj_setjmp:
12283 case ARM::Int_eh_sjlj_setjmp_nofp:
12284 case ARM::tInt_eh_sjlj_setjmp:
12285 case ARM::t2Int_eh_sjlj_setjmp:
12286 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12287 return BB;
12288
12289 case ARM::Int_eh_sjlj_setup_dispatch:
12290 EmitSjLjDispatchBlock(MI, BB);
12291 return BB;
12292
12293 case ARM::ABS:
12294 case ARM::t2ABS: {
12295 // To insert an ABS instruction, we have to insert the
12296 // diamond control-flow pattern. The incoming instruction knows the
12297 // source vreg to test against 0, the destination vreg to set,
12298 // the condition code register to branch on, the
12299 // true/false values to select between, and a branch opcode to use.
12300 // It transforms
12301 // V1 = ABS V0
12302 // into
12303 // V2 = MOVS V0
12304 // BCC (branch to SinkBB if V0 >= 0)
12305 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12306 // SinkBB: V1 = PHI(V2, V3)
12307 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12309 MachineFunction *Fn = BB->getParent();
12310 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12311 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12312 Fn->insert(BBI, RSBBB);
12313 Fn->insert(BBI, SinkBB);
12314
12315 Register ABSSrcReg = MI.getOperand(1).getReg();
12316 Register ABSDstReg = MI.getOperand(0).getReg();
12317 bool ABSSrcKIll = MI.getOperand(1).isKill();
12318 bool isThumb2 = Subtarget->isThumb2();
12320 // In Thumb mode S must not be specified if source register is the SP or
12321 // PC and if destination register is the SP, so restrict register class
12322 Register NewRsbDstReg = MRI.createVirtualRegister(
12323 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12324
12325 // Transfer the remainder of BB and its successor edges to sinkMBB.
12326 SinkBB->splice(SinkBB->begin(), BB,
12327 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12329
12330 BB->addSuccessor(RSBBB);
12331 BB->addSuccessor(SinkBB);
12332
12333 // fall through to SinkMBB
12334 RSBBB->addSuccessor(SinkBB);
12335
12336 // insert a cmp at the end of BB
12337 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12338 .addReg(ABSSrcReg)
12339 .addImm(0)
12341
12342 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12343 BuildMI(BB, dl,
12344 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12346
12347 // insert rsbri in RSBBB
12348 // Note: BCC and rsbri will be converted into predicated rsbmi
12349 // by if-conversion pass
12350 BuildMI(*RSBBB, RSBBB->begin(), dl,
12351 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12352 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12353 .addImm(0)
12355 .add(condCodeOp());
12356
12357 // insert PHI in SinkBB,
12358 // reuse ABSDstReg to not change uses of ABS instruction
12359 BuildMI(*SinkBB, SinkBB->begin(), dl,
12360 TII->get(ARM::PHI), ABSDstReg)
12361 .addReg(NewRsbDstReg).addMBB(RSBBB)
12362 .addReg(ABSSrcReg).addMBB(BB);
12363
12364 // remove ABS instruction
12365 MI.eraseFromParent();
12366
12367 // return last added BB
12368 return SinkBB;
12369 }
12370 case ARM::COPY_STRUCT_BYVAL_I32:
12371 ++NumLoopByVals;
12372 return EmitStructByval(MI, BB);
12373 case ARM::WIN__CHKSTK:
12374 return EmitLowered__chkstk(MI, BB);
12375 case ARM::WIN__DBZCHK:
12376 return EmitLowered__dbzchk(MI, BB);
12377 }
12378}
12379
12380/// Attaches vregs to MEMCPY that it will use as scratch registers
12381/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12382/// instead of as a custom inserter because we need the use list from the SDNode.
12383static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12384 MachineInstr &MI, const SDNode *Node) {
12385 bool isThumb1 = Subtarget->isThumb1Only();
12386
12387 DebugLoc DL = MI.getDebugLoc();
12388 MachineFunction *MF = MI.getParent()->getParent();
12390 MachineInstrBuilder MIB(*MF, MI);
12391
12392 // If the new dst/src is unused mark it as dead.
12393 if (!Node->hasAnyUseOfValue(0)) {
12394 MI.getOperand(0).setIsDead(true);
12395 }
12396 if (!Node->hasAnyUseOfValue(1)) {
12397 MI.getOperand(1).setIsDead(true);
12398 }
12399
12400 // The MEMCPY both defines and kills the scratch registers.
12401 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12402 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12403 : &ARM::GPRRegClass);
12405 }
12406}
12407
12409 SDNode *Node) const {
12410 if (MI.getOpcode() == ARM::MEMCPY) {
12411 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12412 return;
12413 }
12414
12415 const MCInstrDesc *MCID = &MI.getDesc();
12416 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12417 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12418 // operand is still set to noreg. If needed, set the optional operand's
12419 // register to CPSR, and remove the redundant implicit def.
12420 //
12421 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12422
12423 // Rename pseudo opcodes.
12424 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12425 unsigned ccOutIdx;
12426 if (NewOpc) {
12427 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12428 MCID = &TII->get(NewOpc);
12429
12430 assert(MCID->getNumOperands() ==
12431 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12432 && "converted opcode should be the same except for cc_out"
12433 " (and, on Thumb1, pred)");
12434
12435 MI.setDesc(*MCID);
12436
12437 // Add the optional cc_out operand
12438 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12439
12440 // On Thumb1, move all input operands to the end, then add the predicate
12441 if (Subtarget->isThumb1Only()) {
12442 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12443 MI.addOperand(MI.getOperand(1));
12444 MI.removeOperand(1);
12445 }
12446
12447 // Restore the ties
12448 for (unsigned i = MI.getNumOperands(); i--;) {
12449 const MachineOperand& op = MI.getOperand(i);
12450 if (op.isReg() && op.isUse()) {
12451 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12452 if (DefIdx != -1)
12453 MI.tieOperands(DefIdx, i);
12454 }
12455 }
12456
12458 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12459 ccOutIdx = 1;
12460 } else
12461 ccOutIdx = MCID->getNumOperands() - 1;
12462 } else
12463 ccOutIdx = MCID->getNumOperands() - 1;
12464
12465 // Any ARM instruction that sets the 's' bit should specify an optional
12466 // "cc_out" operand in the last operand position.
12467 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12468 assert(!NewOpc && "Optional cc_out operand required");
12469 return;
12470 }
12471 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12472 // since we already have an optional CPSR def.
12473 bool definesCPSR = false;
12474 bool deadCPSR = false;
12475 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12476 ++i) {
12477 const MachineOperand &MO = MI.getOperand(i);
12478 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12479 definesCPSR = true;
12480 if (MO.isDead())
12481 deadCPSR = true;
12482 MI.removeOperand(i);
12483 break;
12484 }
12485 }
12486 if (!definesCPSR) {
12487 assert(!NewOpc && "Optional cc_out operand required");
12488 return;
12489 }
12490 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12491 if (deadCPSR) {
12492 assert(!MI.getOperand(ccOutIdx).getReg() &&
12493 "expect uninitialized optional cc_out operand");
12494 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12495 if (!Subtarget->isThumb1Only())
12496 return;
12497 }
12498
12499 // If this instruction was defined with an optional CPSR def and its dag node
12500 // had a live implicit CPSR def, then activate the optional CPSR def.
12501 MachineOperand &MO = MI.getOperand(ccOutIdx);
12502 MO.setReg(ARM::CPSR);
12503 MO.setIsDef(true);
12504}
12505
12506//===----------------------------------------------------------------------===//
12507// ARM Optimization Hooks
12508//===----------------------------------------------------------------------===//
12509
12510// Helper function that checks if N is a null or all ones constant.
12511static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12513}
12514
12515// Return true if N is conditionally 0 or all ones.
12516// Detects these expressions where cc is an i1 value:
12517//
12518// (select cc 0, y) [AllOnes=0]
12519// (select cc y, 0) [AllOnes=0]
12520// (zext cc) [AllOnes=0]
12521// (sext cc) [AllOnes=0/1]
12522// (select cc -1, y) [AllOnes=1]
12523// (select cc y, -1) [AllOnes=1]
12524//
12525// Invert is set when N is the null/all ones constant when CC is false.
12526// OtherOp is set to the alternative value of N.
12528 SDValue &CC, bool &Invert,
12529 SDValue &OtherOp,
12530 SelectionDAG &DAG) {
12531 switch (N->getOpcode()) {
12532 default: return false;
12533 case ISD::SELECT: {
12534 CC = N->getOperand(0);
12535 SDValue N1 = N->getOperand(1);
12536 SDValue N2 = N->getOperand(2);
12537 if (isZeroOrAllOnes(N1, AllOnes)) {
12538 Invert = false;
12539 OtherOp = N2;
12540 return true;
12541 }
12542 if (isZeroOrAllOnes(N2, AllOnes)) {
12543 Invert = true;
12544 OtherOp = N1;
12545 return true;
12546 }
12547 return false;
12548 }
12549 case ISD::ZERO_EXTEND:
12550 // (zext cc) can never be the all ones value.
12551 if (AllOnes)
12552 return false;
12553 [[fallthrough]];
12554 case ISD::SIGN_EXTEND: {
12555 SDLoc dl(N);
12556 EVT VT = N->getValueType(0);
12557 CC = N->getOperand(0);
12558 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12559 return false;
12560 Invert = !AllOnes;
12561 if (AllOnes)
12562 // When looking for an AllOnes constant, N is an sext, and the 'other'
12563 // value is 0.
12564 OtherOp = DAG.getConstant(0, dl, VT);
12565 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12566 // When looking for a 0 constant, N can be zext or sext.
12567 OtherOp = DAG.getConstant(1, dl, VT);
12568 else
12569 OtherOp = DAG.getAllOnesConstant(dl, VT);
12570 return true;
12571 }
12572 }
12573}
12574
12575// Combine a constant select operand into its use:
12576//
12577// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12578// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12579// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12580// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12581// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12582//
12583// The transform is rejected if the select doesn't have a constant operand that
12584// is null, or all ones when AllOnes is set.
12585//
12586// Also recognize sext/zext from i1:
12587//
12588// (add (zext cc), x) -> (select cc (add x, 1), x)
12589// (add (sext cc), x) -> (select cc (add x, -1), x)
12590//
12591// These transformations eventually create predicated instructions.
12592//
12593// @param N The node to transform.
12594// @param Slct The N operand that is a select.
12595// @param OtherOp The other N operand (x above).
12596// @param DCI Context.
12597// @param AllOnes Require the select constant to be all ones instead of null.
12598// @returns The new node, or SDValue() on failure.
12599static
12602 bool AllOnes = false) {
12603 SelectionDAG &DAG = DCI.DAG;
12604 EVT VT = N->getValueType(0);
12605 SDValue NonConstantVal;
12606 SDValue CCOp;
12607 bool SwapSelectOps;
12608 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12609 NonConstantVal, DAG))
12610 return SDValue();
12611
12612 // Slct is now know to be the desired identity constant when CC is true.
12613 SDValue TrueVal = OtherOp;
12614 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12615 OtherOp, NonConstantVal);
12616 // Unless SwapSelectOps says CC should be false.
12617 if (SwapSelectOps)
12618 std::swap(TrueVal, FalseVal);
12619
12620 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12621 CCOp, TrueVal, FalseVal);
12622}
12623
12624// Attempt combineSelectAndUse on each operand of a commutative operator N.
12625static
12628 SDValue N0 = N->getOperand(0);
12629 SDValue N1 = N->getOperand(1);
12630 if (N0.getNode()->hasOneUse())
12631 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12632 return Result;
12633 if (N1.getNode()->hasOneUse())
12634 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12635 return Result;
12636 return SDValue();
12637}
12638
12640 // VUZP shuffle node.
12641 if (N->getOpcode() == ARMISD::VUZP)
12642 return true;
12643
12644 // "VUZP" on i32 is an alias for VTRN.
12645 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12646 return true;
12647
12648 return false;
12649}
12650
12653 const ARMSubtarget *Subtarget) {
12654 // Look for ADD(VUZP.0, VUZP.1).
12655 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12656 N0 == N1)
12657 return SDValue();
12658
12659 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12660 if (!N->getValueType(0).is64BitVector())
12661 return SDValue();
12662
12663 // Generate vpadd.
12664 SelectionDAG &DAG = DCI.DAG;
12665 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12666 SDLoc dl(N);
12667 SDNode *Unzip = N0.getNode();
12668 EVT VT = N->getValueType(0);
12669
12671 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12672 TLI.getPointerTy(DAG.getDataLayout())));
12673 Ops.push_back(Unzip->getOperand(0));
12674 Ops.push_back(Unzip->getOperand(1));
12675
12676 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12677}
12678
12681 const ARMSubtarget *Subtarget) {
12682 // Check for two extended operands.
12683 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12684 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12685 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12686 N1.getOpcode() == ISD::ZERO_EXTEND))
12687 return SDValue();
12688
12689 SDValue N00 = N0.getOperand(0);
12690 SDValue N10 = N1.getOperand(0);
12691
12692 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12693 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12694 N00 == N10)
12695 return SDValue();
12696
12697 // We only recognize Q register paddl here; this can't be reached until
12698 // after type legalization.
12699 if (!N00.getValueType().is64BitVector() ||
12701 return SDValue();
12702
12703 // Generate vpaddl.
12704 SelectionDAG &DAG = DCI.DAG;
12705 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12706 SDLoc dl(N);
12707 EVT VT = N->getValueType(0);
12708
12710 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12711 unsigned Opcode;
12712 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12713 Opcode = Intrinsic::arm_neon_vpaddls;
12714 else
12715 Opcode = Intrinsic::arm_neon_vpaddlu;
12716 Ops.push_back(DAG.getConstant(Opcode, dl,
12717 TLI.getPointerTy(DAG.getDataLayout())));
12718 EVT ElemTy = N00.getValueType().getVectorElementType();
12719 unsigned NumElts = VT.getVectorNumElements();
12720 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12721 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12722 N00.getOperand(0), N00.getOperand(1));
12723 Ops.push_back(Concat);
12724
12725 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12726}
12727
12728// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12729// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12730// much easier to match.
12731static SDValue
12734 const ARMSubtarget *Subtarget) {
12735 // Only perform optimization if after legalize, and if NEON is available. We
12736 // also expected both operands to be BUILD_VECTORs.
12737 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12738 || N0.getOpcode() != ISD::BUILD_VECTOR
12739 || N1.getOpcode() != ISD::BUILD_VECTOR)
12740 return SDValue();
12741
12742 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12743 EVT VT = N->getValueType(0);
12744 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12745 return SDValue();
12746
12747 // Check that the vector operands are of the right form.
12748 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12749 // operands, where N is the size of the formed vector.
12750 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12751 // index such that we have a pair wise add pattern.
12752
12753 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12755 return SDValue();
12756 SDValue Vec = N0->getOperand(0)->getOperand(0);
12757 SDNode *V = Vec.getNode();
12758 unsigned nextIndex = 0;
12759
12760 // For each operands to the ADD which are BUILD_VECTORs,
12761 // check to see if each of their operands are an EXTRACT_VECTOR with
12762 // the same vector and appropriate index.
12763 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12766
12767 SDValue ExtVec0 = N0->getOperand(i);
12768 SDValue ExtVec1 = N1->getOperand(i);
12769
12770 // First operand is the vector, verify its the same.
12771 if (V != ExtVec0->getOperand(0).getNode() ||
12772 V != ExtVec1->getOperand(0).getNode())
12773 return SDValue();
12774
12775 // Second is the constant, verify its correct.
12776 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12777 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12778
12779 // For the constant, we want to see all the even or all the odd.
12780 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12781 || C1->getZExtValue() != nextIndex+1)
12782 return SDValue();
12783
12784 // Increment index.
12785 nextIndex+=2;
12786 } else
12787 return SDValue();
12788 }
12789
12790 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12791 // we're using the entire input vector, otherwise there's a size/legality
12792 // mismatch somewhere.
12793 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12795 return SDValue();
12796
12797 // Create VPADDL node.
12798 SelectionDAG &DAG = DCI.DAG;
12799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12800
12801 SDLoc dl(N);
12802
12803 // Build operand list.
12805 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12806 TLI.getPointerTy(DAG.getDataLayout())));
12807
12808 // Input is the vector.
12809 Ops.push_back(Vec);
12810
12811 // Get widened type and narrowed type.
12812 MVT widenType;
12813 unsigned numElem = VT.getVectorNumElements();
12814
12815 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12816 switch (inputLaneType.getSimpleVT().SimpleTy) {
12817 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12818 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12819 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12820 default:
12821 llvm_unreachable("Invalid vector element type for padd optimization.");
12822 }
12823
12824 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12825 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12826 return DAG.getNode(ExtOp, dl, VT, tmp);
12827}
12828
12830 if (V->getOpcode() == ISD::UMUL_LOHI ||
12831 V->getOpcode() == ISD::SMUL_LOHI)
12832 return V;
12833 return SDValue();
12834}
12835
12836static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12838 const ARMSubtarget *Subtarget) {
12839 if (!Subtarget->hasBaseDSP())
12840 return SDValue();
12841
12842 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12843 // accumulates the product into a 64-bit value. The 16-bit values will
12844 // be sign extended somehow or SRA'd into 32-bit values
12845 // (addc (adde (mul 16bit, 16bit), lo), hi)
12846 SDValue Mul = AddcNode->getOperand(0);
12847 SDValue Lo = AddcNode->getOperand(1);
12848 if (Mul.getOpcode() != ISD::MUL) {
12849 Lo = AddcNode->getOperand(0);
12850 Mul = AddcNode->getOperand(1);
12851 if (Mul.getOpcode() != ISD::MUL)
12852 return SDValue();
12853 }
12854
12855 SDValue SRA = AddeNode->getOperand(0);
12856 SDValue Hi = AddeNode->getOperand(1);
12857 if (SRA.getOpcode() != ISD::SRA) {
12858 SRA = AddeNode->getOperand(1);
12859 Hi = AddeNode->getOperand(0);
12860 if (SRA.getOpcode() != ISD::SRA)
12861 return SDValue();
12862 }
12863 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12864 if (Const->getZExtValue() != 31)
12865 return SDValue();
12866 } else
12867 return SDValue();
12868
12869 if (SRA.getOperand(0) != Mul)
12870 return SDValue();
12871
12872 SelectionDAG &DAG = DCI.DAG;
12873 SDLoc dl(AddcNode);
12874 unsigned Opcode = 0;
12875 SDValue Op0;
12876 SDValue Op1;
12877
12878 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12879 Opcode = ARMISD::SMLALBB;
12880 Op0 = Mul.getOperand(0);
12881 Op1 = Mul.getOperand(1);
12882 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12883 Opcode = ARMISD::SMLALBT;
12884 Op0 = Mul.getOperand(0);
12885 Op1 = Mul.getOperand(1).getOperand(0);
12886 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12887 Opcode = ARMISD::SMLALTB;
12888 Op0 = Mul.getOperand(0).getOperand(0);
12889 Op1 = Mul.getOperand(1);
12890 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12891 Opcode = ARMISD::SMLALTT;
12892 Op0 = Mul->getOperand(0).getOperand(0);
12893 Op1 = Mul->getOperand(1).getOperand(0);
12894 }
12895
12896 if (!Op0 || !Op1)
12897 return SDValue();
12898
12899 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12900 Op0, Op1, Lo, Hi);
12901 // Replace the ADDs' nodes uses by the MLA node's values.
12902 SDValue HiMLALResult(SMLAL.getNode(), 1);
12903 SDValue LoMLALResult(SMLAL.getNode(), 0);
12904
12905 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12906 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12907
12908 // Return original node to notify the driver to stop replacing.
12909 SDValue resNode(AddcNode, 0);
12910 return resNode;
12911}
12912
12915 const ARMSubtarget *Subtarget) {
12916 // Look for multiply add opportunities.
12917 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12918 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12919 // a glue link from the first add to the second add.
12920 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12921 // a S/UMLAL instruction.
12922 // UMUL_LOHI
12923 // / :lo \ :hi
12924 // V \ [no multiline comment]
12925 // loAdd -> ADDC |
12926 // \ :carry /
12927 // V V
12928 // ADDE <- hiAdd
12929 //
12930 // In the special case where only the higher part of a signed result is used
12931 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12932 // a constant with the exact value of 0x80000000, we recognize we are dealing
12933 // with a "rounded multiply and add" (or subtract) and transform it into
12934 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12935
12936 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12937 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12938 "Expect an ADDE or SUBE");
12939
12940 assert(AddeSubeNode->getNumOperands() == 3 &&
12941 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12942 "ADDE node has the wrong inputs");
12943
12944 // Check that we are chained to the right ADDC or SUBC node.
12945 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12946 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12947 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12948 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12949 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12950 return SDValue();
12951
12952 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12953 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12954
12955 // Check if the two operands are from the same mul_lohi node.
12956 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12957 return SDValue();
12958
12959 assert(AddcSubcNode->getNumValues() == 2 &&
12960 AddcSubcNode->getValueType(0) == MVT::i32 &&
12961 "Expect ADDC with two result values. First: i32");
12962
12963 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12964 // maybe a SMLAL which multiplies two 16-bit values.
12965 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12966 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12967 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12968 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12969 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12970 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12971
12972 // Check for the triangle shape.
12973 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12974 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12975
12976 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12977 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12978 return SDValue();
12979
12980 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12981 bool IsLeftOperandMUL = false;
12982 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12983 if (MULOp == SDValue())
12984 MULOp = findMUL_LOHI(AddeSubeOp1);
12985 else
12986 IsLeftOperandMUL = true;
12987 if (MULOp == SDValue())
12988 return SDValue();
12989
12990 // Figure out the right opcode.
12991 unsigned Opc = MULOp->getOpcode();
12992 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12993
12994 // Figure out the high and low input values to the MLAL node.
12995 SDValue *HiAddSub = nullptr;
12996 SDValue *LoMul = nullptr;
12997 SDValue *LowAddSub = nullptr;
12998
12999 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
13000 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
13001 return SDValue();
13002
13003 if (IsLeftOperandMUL)
13004 HiAddSub = &AddeSubeOp1;
13005 else
13006 HiAddSub = &AddeSubeOp0;
13007
13008 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13009 // whose low result is fed to the ADDC/SUBC we are checking.
13010
13011 if (AddcSubcOp0 == MULOp.getValue(0)) {
13012 LoMul = &AddcSubcOp0;
13013 LowAddSub = &AddcSubcOp1;
13014 }
13015 if (AddcSubcOp1 == MULOp.getValue(0)) {
13016 LoMul = &AddcSubcOp1;
13017 LowAddSub = &AddcSubcOp0;
13018 }
13019
13020 if (!LoMul)
13021 return SDValue();
13022
13023 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13024 // the replacement below will create a cycle.
13025 if (AddcSubcNode == HiAddSub->getNode() ||
13026 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13027 return SDValue();
13028
13029 // Create the merged node.
13030 SelectionDAG &DAG = DCI.DAG;
13031
13032 // Start building operand list.
13034 Ops.push_back(LoMul->getOperand(0));
13035 Ops.push_back(LoMul->getOperand(1));
13036
13037 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13038 // the case, we must be doing signed multiplication and only use the higher
13039 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13040 // addition or subtraction with the value of 0x800000.
13041 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13042 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13043 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13044 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13045 0x80000000) {
13046 Ops.push_back(*HiAddSub);
13047 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13048 FinalOpc = ARMISD::SMMLSR;
13049 } else {
13050 FinalOpc = ARMISD::SMMLAR;
13051 }
13052 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13053 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13054
13055 return SDValue(AddeSubeNode, 0);
13056 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13057 // SMMLS is generated during instruction selection and the rest of this
13058 // function can not handle the case where AddcSubcNode is a SUBC.
13059 return SDValue();
13060
13061 // Finish building the operand list for {U/S}MLAL
13062 Ops.push_back(*LowAddSub);
13063 Ops.push_back(*HiAddSub);
13064
13065 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13066 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13067
13068 // Replace the ADDs' nodes uses by the MLA node's values.
13069 SDValue HiMLALResult(MLALNode.getNode(), 1);
13070 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13071
13072 SDValue LoMLALResult(MLALNode.getNode(), 0);
13073 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13074
13075 // Return original node to notify the driver to stop replacing.
13076 return SDValue(AddeSubeNode, 0);
13077}
13078
13081 const ARMSubtarget *Subtarget) {
13082 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13083 // While trying to combine for the other MLAL nodes, first search for the
13084 // chance to use UMAAL. Check if Addc uses a node which has already
13085 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13086 // as the addend, and it's handled in PerformUMLALCombine.
13087
13088 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13089 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13090
13091 // Check that we have a glued ADDC node.
13092 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13093 if (AddcNode->getOpcode() != ARMISD::ADDC)
13094 return SDValue();
13095
13096 // Find the converted UMAAL or quit if it doesn't exist.
13097 SDNode *UmlalNode = nullptr;
13098 SDValue AddHi;
13099 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13100 UmlalNode = AddcNode->getOperand(0).getNode();
13101 AddHi = AddcNode->getOperand(1);
13102 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13103 UmlalNode = AddcNode->getOperand(1).getNode();
13104 AddHi = AddcNode->getOperand(0);
13105 } else {
13106 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13107 }
13108
13109 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13110 // the ADDC as well as Zero.
13111 if (!isNullConstant(UmlalNode->getOperand(3)))
13112 return SDValue();
13113
13114 if ((isNullConstant(AddeNode->getOperand(0)) &&
13115 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13116 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13117 isNullConstant(AddeNode->getOperand(1)))) {
13118 SelectionDAG &DAG = DCI.DAG;
13119 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13120 UmlalNode->getOperand(2), AddHi };
13121 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13122 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13123
13124 // Replace the ADDs' nodes uses by the UMAAL node's values.
13125 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13126 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13127
13128 // Return original node to notify the driver to stop replacing.
13129 return SDValue(AddeNode, 0);
13130 }
13131 return SDValue();
13132}
13133
13135 const ARMSubtarget *Subtarget) {
13136 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13137 return SDValue();
13138
13139 // Check that we have a pair of ADDC and ADDE as operands.
13140 // Both addends of the ADDE must be zero.
13141 SDNode* AddcNode = N->getOperand(2).getNode();
13142 SDNode* AddeNode = N->getOperand(3).getNode();
13143 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13144 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13145 isNullConstant(AddeNode->getOperand(0)) &&
13146 isNullConstant(AddeNode->getOperand(1)) &&
13147 (AddeNode->getOperand(2).getNode() == AddcNode))
13148 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13149 DAG.getVTList(MVT::i32, MVT::i32),
13150 {N->getOperand(0), N->getOperand(1),
13151 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13152 else
13153 return SDValue();
13154}
13155
13158 const ARMSubtarget *Subtarget) {
13159 SelectionDAG &DAG(DCI.DAG);
13160
13161 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13162 // (SUBC (ADDE 0, 0, C), 1) -> C
13163 SDValue LHS = N->getOperand(0);
13164 SDValue RHS = N->getOperand(1);
13165 if (LHS->getOpcode() == ARMISD::ADDE &&
13166 isNullConstant(LHS->getOperand(0)) &&
13167 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13168 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13169 }
13170 }
13171
13172 if (Subtarget->isThumb1Only()) {
13173 SDValue RHS = N->getOperand(1);
13174 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13175 int32_t imm = C->getSExtValue();
13176 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13177 SDLoc DL(N);
13178 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13179 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13180 : ARMISD::ADDC;
13181 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13182 }
13183 }
13184 }
13185
13186 return SDValue();
13187}
13188
13191 const ARMSubtarget *Subtarget) {
13192 if (Subtarget->isThumb1Only()) {
13193 SelectionDAG &DAG = DCI.DAG;
13194 SDValue RHS = N->getOperand(1);
13195 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13196 int64_t imm = C->getSExtValue();
13197 if (imm < 0) {
13198 SDLoc DL(N);
13199
13200 // The with-carry-in form matches bitwise not instead of the negation.
13201 // Effectively, the inverse interpretation of the carry flag already
13202 // accounts for part of the negation.
13203 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13204
13205 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13206 : ARMISD::ADDE;
13207 return DAG.getNode(Opcode, DL, N->getVTList(),
13208 N->getOperand(0), RHS, N->getOperand(2));
13209 }
13210 }
13211 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13212 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13213 }
13214 return SDValue();
13215}
13216
13219 const ARMSubtarget *Subtarget) {
13220 if (!Subtarget->hasMVEIntegerOps())
13221 return SDValue();
13222
13223 SDLoc dl(N);
13224 SDValue SetCC;
13225 SDValue LHS;
13226 SDValue RHS;
13228 SDValue TrueVal;
13229 SDValue FalseVal;
13230
13231 if (N->getOpcode() == ISD::SELECT &&
13232 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13233 SetCC = N->getOperand(0);
13234 LHS = SetCC->getOperand(0);
13235 RHS = SetCC->getOperand(1);
13236 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13237 TrueVal = N->getOperand(1);
13238 FalseVal = N->getOperand(2);
13239 } else if (N->getOpcode() == ISD::SELECT_CC) {
13240 LHS = N->getOperand(0);
13241 RHS = N->getOperand(1);
13242 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13243 TrueVal = N->getOperand(2);
13244 FalseVal = N->getOperand(3);
13245 } else {
13246 return SDValue();
13247 }
13248
13249 unsigned int Opcode = 0;
13250 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13251 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13252 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13253 Opcode = ARMISD::VMINVu;
13254 if (CC == ISD::SETUGT)
13255 std::swap(TrueVal, FalseVal);
13256 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13257 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13258 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13259 Opcode = ARMISD::VMINVs;
13260 if (CC == ISD::SETGT)
13261 std::swap(TrueVal, FalseVal);
13262 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13263 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13264 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13265 Opcode = ARMISD::VMAXVu;
13266 if (CC == ISD::SETULT)
13267 std::swap(TrueVal, FalseVal);
13268 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13269 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13270 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13271 Opcode = ARMISD::VMAXVs;
13272 if (CC == ISD::SETLT)
13273 std::swap(TrueVal, FalseVal);
13274 } else
13275 return SDValue();
13276
13277 // Normalise to the right hand side being the vector reduction
13278 switch (TrueVal->getOpcode()) {
13283 std::swap(LHS, RHS);
13284 std::swap(TrueVal, FalseVal);
13285 break;
13286 }
13287
13288 EVT VectorType = FalseVal->getOperand(0).getValueType();
13289
13290 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13291 VectorType != MVT::v4i32)
13292 return SDValue();
13293
13294 EVT VectorScalarType = VectorType.getVectorElementType();
13295
13296 // The values being selected must also be the ones being compared
13297 if (TrueVal != LHS || FalseVal != RHS)
13298 return SDValue();
13299
13300 EVT LeftType = LHS->getValueType(0);
13301 EVT RightType = RHS->getValueType(0);
13302
13303 // The types must match the reduced type too
13304 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13305 return SDValue();
13306
13307 // Legalise the scalar to an i32
13308 if (VectorScalarType != MVT::i32)
13309 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13310
13311 // Generate the reduction as an i32 for legalisation purposes
13312 auto Reduction =
13313 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13314
13315 // The result isn't actually an i32 so truncate it back to its original type
13316 if (VectorScalarType != MVT::i32)
13317 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13318
13319 return Reduction;
13320}
13321
13322// A special combine for the vqdmulh family of instructions. This is one of the
13323// potential set of patterns that could patch this instruction. The base pattern
13324// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13325// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13326// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13327// the max is unnecessary.
13329 EVT VT = N->getValueType(0);
13330 SDValue Shft;
13331 ConstantSDNode *Clamp;
13332
13333 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13334 return SDValue();
13335
13336 if (N->getOpcode() == ISD::SMIN) {
13337 Shft = N->getOperand(0);
13338 Clamp = isConstOrConstSplat(N->getOperand(1));
13339 } else if (N->getOpcode() == ISD::VSELECT) {
13340 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13341 SDValue Cmp = N->getOperand(0);
13342 if (Cmp.getOpcode() != ISD::SETCC ||
13343 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13344 Cmp.getOperand(0) != N->getOperand(1) ||
13345 Cmp.getOperand(1) != N->getOperand(2))
13346 return SDValue();
13347 Shft = N->getOperand(1);
13348 Clamp = isConstOrConstSplat(N->getOperand(2));
13349 } else
13350 return SDValue();
13351
13352 if (!Clamp)
13353 return SDValue();
13354
13355 MVT ScalarType;
13356 int ShftAmt = 0;
13357 switch (Clamp->getSExtValue()) {
13358 case (1 << 7) - 1:
13359 ScalarType = MVT::i8;
13360 ShftAmt = 7;
13361 break;
13362 case (1 << 15) - 1:
13363 ScalarType = MVT::i16;
13364 ShftAmt = 15;
13365 break;
13366 case (1ULL << 31) - 1:
13367 ScalarType = MVT::i32;
13368 ShftAmt = 31;
13369 break;
13370 default:
13371 return SDValue();
13372 }
13373
13374 if (Shft.getOpcode() != ISD::SRA)
13375 return SDValue();
13377 if (!N1 || N1->getSExtValue() != ShftAmt)
13378 return SDValue();
13379
13380 SDValue Mul = Shft.getOperand(0);
13381 if (Mul.getOpcode() != ISD::MUL)
13382 return SDValue();
13383
13384 SDValue Ext0 = Mul.getOperand(0);
13385 SDValue Ext1 = Mul.getOperand(1);
13386 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13387 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13388 return SDValue();
13389 EVT VecVT = Ext0.getOperand(0).getValueType();
13390 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13391 return SDValue();
13392 if (Ext1.getOperand(0).getValueType() != VecVT ||
13393 VecVT.getScalarType() != ScalarType ||
13394 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13395 return SDValue();
13396
13397 SDLoc DL(Mul);
13398 unsigned LegalLanes = 128 / (ShftAmt + 1);
13399 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13400 // For types smaller than legal vectors extend to be legal and only use needed
13401 // lanes.
13402 if (VecVT.getSizeInBits() < 128) {
13403 EVT ExtVecVT =
13405 VecVT.getVectorNumElements());
13406 SDValue Inp0 =
13407 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13408 SDValue Inp1 =
13409 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13410 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13411 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13412 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13413 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13414 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13415 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13416 }
13417
13418 // For larger types, split into legal sized chunks.
13419 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13420 unsigned NumParts = VecVT.getSizeInBits() / 128;
13422 for (unsigned I = 0; I < NumParts; ++I) {
13423 SDValue Inp0 =
13424 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13425 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13426 SDValue Inp1 =
13427 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13428 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13429 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13430 Parts.push_back(VQDMULH);
13431 }
13432 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13433 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13434}
13435
13438 const ARMSubtarget *Subtarget) {
13439 if (!Subtarget->hasMVEIntegerOps())
13440 return SDValue();
13441
13442 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13443 return V;
13444
13445 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13446 //
13447 // We need to re-implement this optimization here as the implementation in the
13448 // Target-Independent DAGCombiner does not handle the kind of constant we make
13449 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13450 // good reason, allowing truncation there would break other targets).
13451 //
13452 // Currently, this is only done for MVE, as it's the only target that benefits
13453 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13454 if (N->getOperand(0).getOpcode() != ISD::XOR)
13455 return SDValue();
13456 SDValue XOR = N->getOperand(0);
13457
13458 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13459 // It is important to check with truncation allowed as the BUILD_VECTORs we
13460 // generate in those situations will truncate their operands.
13461 ConstantSDNode *Const =
13462 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13463 /*AllowTruncation*/ true);
13464 if (!Const || !Const->isOne())
13465 return SDValue();
13466
13467 // Rewrite into vselect(cond, rhs, lhs).
13468 SDValue Cond = XOR->getOperand(0);
13469 SDValue LHS = N->getOperand(1);
13470 SDValue RHS = N->getOperand(2);
13471 EVT Type = N->getValueType(0);
13472 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13473}
13474
13475// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13478 const ARMSubtarget *Subtarget) {
13479 SDValue Op0 = N->getOperand(0);
13480 SDValue Op1 = N->getOperand(1);
13481 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13482 EVT VT = N->getValueType(0);
13483
13484 if (!Subtarget->hasMVEIntegerOps() ||
13486 return SDValue();
13487
13488 if (CC == ISD::SETUGE) {
13489 std::swap(Op0, Op1);
13490 CC = ISD::SETULT;
13491 }
13492
13493 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13495 return SDValue();
13496
13497 // Check first operand is BuildVector of 0,1,2,...
13498 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13499 if (!Op0.getOperand(I).isUndef() &&
13500 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13501 Op0.getConstantOperandVal(I) == I))
13502 return SDValue();
13503 }
13504
13505 // The second is a Splat of Op1S
13506 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13507 if (!Op1S)
13508 return SDValue();
13509
13510 unsigned Opc;
13511 switch (VT.getVectorNumElements()) {
13512 case 2:
13513 Opc = Intrinsic::arm_mve_vctp64;
13514 break;
13515 case 4:
13516 Opc = Intrinsic::arm_mve_vctp32;
13517 break;
13518 case 8:
13519 Opc = Intrinsic::arm_mve_vctp16;
13520 break;
13521 case 16:
13522 Opc = Intrinsic::arm_mve_vctp8;
13523 break;
13524 default:
13525 return SDValue();
13526 }
13527
13528 SDLoc DL(N);
13529 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13530 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13531 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13532}
13533
13534/// PerformADDECombine - Target-specific dag combine transform from
13535/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13536/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13539 const ARMSubtarget *Subtarget) {
13540 // Only ARM and Thumb2 support UMLAL/SMLAL.
13541 if (Subtarget->isThumb1Only())
13542 return PerformAddeSubeCombine(N, DCI, Subtarget);
13543
13544 // Only perform the checks after legalize when the pattern is available.
13545 if (DCI.isBeforeLegalize()) return SDValue();
13546
13547 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13548}
13549
13550/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13551/// operands N0 and N1. This is a helper for PerformADDCombine that is
13552/// called with the default operands, and if that fails, with commuted
13553/// operands.
13556 const ARMSubtarget *Subtarget){
13557 // Attempt to create vpadd for this add.
13558 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13559 return Result;
13560
13561 // Attempt to create vpaddl for this add.
13562 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13563 return Result;
13564 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13565 Subtarget))
13566 return Result;
13567
13568 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13569 if (N0.getNode()->hasOneUse())
13570 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13571 return Result;
13572 return SDValue();
13573}
13574
13576 EVT VT = N->getValueType(0);
13577 SDValue N0 = N->getOperand(0);
13578 SDValue N1 = N->getOperand(1);
13579 SDLoc dl(N);
13580
13581 auto IsVecReduce = [](SDValue Op) {
13582 switch (Op.getOpcode()) {
13583 case ISD::VECREDUCE_ADD:
13584 case ARMISD::VADDVs:
13585 case ARMISD::VADDVu:
13586 case ARMISD::VMLAVs:
13587 case ARMISD::VMLAVu:
13588 return true;
13589 }
13590 return false;
13591 };
13592
13593 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13594 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13595 // add(add(X, vecreduce(Y)), vecreduce(Z))
13596 // to make better use of vaddva style instructions.
13597 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13598 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13599 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13600 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13601 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13602 }
13603 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13604 // add(add(add(A, C), reduce(B)), reduce(D))
13605 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13606 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13607 unsigned N0RedOp = 0;
13608 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13609 N0RedOp = 1;
13610 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13611 return SDValue();
13612 }
13613
13614 unsigned N1RedOp = 0;
13615 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13616 N1RedOp = 1;
13617 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13618 return SDValue();
13619
13620 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13621 N1.getOperand(1 - N1RedOp));
13622 SDValue Add1 =
13623 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13624 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13625 }
13626 return SDValue();
13627 };
13628 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13629 return R;
13630 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13631 return R;
13632
13633 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13634 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13635 // by ascending load offsets. This can help cores prefetch if the order of
13636 // loads is more predictable.
13637 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13638 // Check if two reductions are known to load data where one is before/after
13639 // another. Return negative if N0 loads data before N1, positive if N1 is
13640 // before N0 and 0 otherwise if nothing is known.
13641 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13642 // Look through to the first operand of a MUL, for the VMLA case.
13643 // Currently only looks at the first operand, in the hope they are equal.
13644 if (N0.getOpcode() == ISD::MUL)
13645 N0 = N0.getOperand(0);
13646 if (N1.getOpcode() == ISD::MUL)
13647 N1 = N1.getOperand(0);
13648
13649 // Return true if the two operands are loads to the same object and the
13650 // offset of the first is known to be less than the offset of the second.
13651 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13652 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13653 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13654 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13655 Load1->isIndexed())
13656 return 0;
13657
13658 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13659 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13660
13661 if (!BaseLocDecomp0.getBase() ||
13662 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13663 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13664 return 0;
13665 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13666 return -1;
13667 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13668 return 1;
13669 return 0;
13670 };
13671
13672 SDValue X;
13673 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13674 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13675 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13676 N0.getOperand(1).getOperand(0));
13677 if (IsBefore < 0) {
13678 X = N0.getOperand(0);
13679 N0 = N0.getOperand(1);
13680 } else if (IsBefore > 0) {
13681 X = N0.getOperand(1);
13682 N0 = N0.getOperand(0);
13683 } else
13684 return SDValue();
13685 } else if (IsVecReduce(N0.getOperand(0))) {
13686 X = N0.getOperand(1);
13687 N0 = N0.getOperand(0);
13688 } else if (IsVecReduce(N0.getOperand(1))) {
13689 X = N0.getOperand(0);
13690 N0 = N0.getOperand(1);
13691 } else
13692 return SDValue();
13693 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13694 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13695 // Note this is backward to how you would expect. We create
13696 // add(reduce(load + 16), reduce(load + 0)) so that the
13697 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13698 // the X as VADDV(load + 0)
13699 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13700 } else
13701 return SDValue();
13702
13703 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13704 return SDValue();
13705
13706 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13707 return SDValue();
13708
13709 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13710 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13711 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13712 };
13713 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13714 return R;
13715 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13716 return R;
13717 return SDValue();
13718}
13719
13721 const ARMSubtarget *Subtarget) {
13722 if (!Subtarget->hasMVEIntegerOps())
13723 return SDValue();
13724
13726 return R;
13727
13728 EVT VT = N->getValueType(0);
13729 SDValue N0 = N->getOperand(0);
13730 SDValue N1 = N->getOperand(1);
13731 SDLoc dl(N);
13732
13733 if (VT != MVT::i64)
13734 return SDValue();
13735
13736 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13737 // will look like:
13738 // t1: i32,i32 = ARMISD::VADDLVs x
13739 // t2: i64 = build_pair t1, t1:1
13740 // t3: i64 = add t2, y
13741 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13742 // the add to be simplified separately.
13743 // We also need to check for sext / zext and commutitive adds.
13744 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13745 SDValue NB) {
13746 if (NB->getOpcode() != ISD::BUILD_PAIR)
13747 return SDValue();
13748 SDValue VecRed = NB->getOperand(0);
13749 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13750 VecRed.getResNo() != 0 ||
13751 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13752 return SDValue();
13753
13754 if (VecRed->getOpcode() == OpcodeA) {
13755 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13756 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13757 VecRed.getOperand(0), VecRed.getOperand(1));
13758 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13759 }
13760
13762 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13763
13764 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13765 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13766 Ops.push_back(VecRed->getOperand(I));
13767 SDValue Red =
13768 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13769 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13770 SDValue(Red.getNode(), 1));
13771 };
13772
13773 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13774 return M;
13775 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13776 return M;
13777 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13778 return M;
13779 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13780 return M;
13781 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13782 return M;
13783 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13784 return M;
13785 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13786 return M;
13787 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13788 return M;
13789 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13790 return M;
13791 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13792 return M;
13793 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13794 return M;
13795 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13796 return M;
13797 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13798 return M;
13799 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13800 return M;
13801 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13802 return M;
13803 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13804 return M;
13805 return SDValue();
13806}
13807
13808bool
13810 CombineLevel Level) const {
13811 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13812 N->getOpcode() == ISD::SRL) &&
13813 "Expected shift op");
13814
13815 if (Level == BeforeLegalizeTypes)
13816 return true;
13817
13818 if (N->getOpcode() != ISD::SHL)
13819 return true;
13820
13821 if (Subtarget->isThumb1Only()) {
13822 // Avoid making expensive immediates by commuting shifts. (This logic
13823 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13824 // for free.)
13825 if (N->getOpcode() != ISD::SHL)
13826 return true;
13827 SDValue N1 = N->getOperand(0);
13828 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13829 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13830 return true;
13831 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13832 if (Const->getAPIntValue().ult(256))
13833 return false;
13834 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13835 Const->getAPIntValue().sgt(-256))
13836 return false;
13837 }
13838 return true;
13839 }
13840
13841 // Turn off commute-with-shift transform after legalization, so it doesn't
13842 // conflict with PerformSHLSimplify. (We could try to detect when
13843 // PerformSHLSimplify would trigger more precisely, but it isn't
13844 // really necessary.)
13845 return false;
13846}
13847
13849 const SDNode *N) const {
13850 assert(N->getOpcode() == ISD::XOR &&
13851 (N->getOperand(0).getOpcode() == ISD::SHL ||
13852 N->getOperand(0).getOpcode() == ISD::SRL) &&
13853 "Expected XOR(SHIFT) pattern");
13854
13855 // Only commute if the entire NOT mask is a hidden shifted mask.
13856 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13857 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13858 if (XorC && ShiftC) {
13859 unsigned MaskIdx, MaskLen;
13860 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13861 unsigned ShiftAmt = ShiftC->getZExtValue();
13862 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13863 if (N->getOperand(0).getOpcode() == ISD::SHL)
13864 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13865 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13866 }
13867 }
13868
13869 return false;
13870}
13871
13873 const SDNode *N, CombineLevel Level) const {
13874 assert(((N->getOpcode() == ISD::SHL &&
13875 N->getOperand(0).getOpcode() == ISD::SRL) ||
13876 (N->getOpcode() == ISD::SRL &&
13877 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13878 "Expected shift-shift mask");
13879
13880 if (!Subtarget->isThumb1Only())
13881 return true;
13882
13883 if (Level == BeforeLegalizeTypes)
13884 return true;
13885
13886 return false;
13887}
13888
13890 EVT VT) const {
13891 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13892}
13893
13895 if (!Subtarget->hasNEON()) {
13896 if (Subtarget->isThumb1Only())
13897 return VT.getScalarSizeInBits() <= 32;
13898 return true;
13899 }
13900 return VT.isScalarInteger();
13901}
13902
13904 EVT VT) const {
13905 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13906 return false;
13907
13908 switch (FPVT.getSimpleVT().SimpleTy) {
13909 case MVT::f16:
13910 return Subtarget->hasVFP2Base();
13911 case MVT::f32:
13912 return Subtarget->hasVFP2Base();
13913 case MVT::f64:
13914 return Subtarget->hasFP64();
13915 case MVT::v4f32:
13916 case MVT::v8f16:
13917 return Subtarget->hasMVEFloatOps();
13918 default:
13919 return false;
13920 }
13921}
13922
13925 const ARMSubtarget *ST) {
13926 // Allow the generic combiner to identify potential bswaps.
13927 if (DCI.isBeforeLegalize())
13928 return SDValue();
13929
13930 // DAG combiner will fold:
13931 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13932 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13933 // Other code patterns that can be also be modified have the following form:
13934 // b + ((a << 1) | 510)
13935 // b + ((a << 1) & 510)
13936 // b + ((a << 1) ^ 510)
13937 // b + ((a << 1) + 510)
13938
13939 // Many instructions can perform the shift for free, but it requires both
13940 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13941 // instruction will needed. So, unfold back to the original pattern if:
13942 // - if c1 and c2 are small enough that they don't require mov imms.
13943 // - the user(s) of the node can perform an shl
13944
13945 // No shifted operands for 16-bit instructions.
13946 if (ST->isThumb() && ST->isThumb1Only())
13947 return SDValue();
13948
13949 // Check that all the users could perform the shl themselves.
13950 for (auto *U : N->uses()) {
13951 switch(U->getOpcode()) {
13952 default:
13953 return SDValue();
13954 case ISD::SUB:
13955 case ISD::ADD:
13956 case ISD::AND:
13957 case ISD::OR:
13958 case ISD::XOR:
13959 case ISD::SETCC:
13960 case ARMISD::CMP:
13961 // Check that the user isn't already using a constant because there
13962 // aren't any instructions that support an immediate operand and a
13963 // shifted operand.
13964 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13965 isa<ConstantSDNode>(U->getOperand(1)))
13966 return SDValue();
13967
13968 // Check that it's not already using a shift.
13969 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13970 U->getOperand(1).getOpcode() == ISD::SHL)
13971 return SDValue();
13972 break;
13973 }
13974 }
13975
13976 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13977 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13978 return SDValue();
13979
13980 if (N->getOperand(0).getOpcode() != ISD::SHL)
13981 return SDValue();
13982
13983 SDValue SHL = N->getOperand(0);
13984
13985 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13986 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13987 if (!C1ShlC2 || !C2)
13988 return SDValue();
13989
13990 APInt C2Int = C2->getAPIntValue();
13991 APInt C1Int = C1ShlC2->getAPIntValue();
13992 unsigned C2Width = C2Int.getBitWidth();
13993 if (C2Int.uge(C2Width))
13994 return SDValue();
13995 uint64_t C2Value = C2Int.getZExtValue();
13996
13997 // Check that performing a lshr will not lose any information.
13998 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13999 if ((C1Int & Mask) != C1Int)
14000 return SDValue();
14001
14002 // Shift the first constant.
14003 C1Int.lshrInPlace(C2Int);
14004
14005 // The immediates are encoded as an 8-bit value that can be rotated.
14006 auto LargeImm = [](const APInt &Imm) {
14007 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
14008 return Imm.getBitWidth() - Zeros > 8;
14009 };
14010
14011 if (LargeImm(C1Int) || LargeImm(C2Int))
14012 return SDValue();
14013
14014 SelectionDAG &DAG = DCI.DAG;
14015 SDLoc dl(N);
14016 SDValue X = SHL.getOperand(0);
14017 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14018 DAG.getConstant(C1Int, dl, MVT::i32));
14019 // Shift left to compensate for the lshr of C1Int.
14020 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14021
14022 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14023 SHL.dump(); N->dump());
14024 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14025 return Res;
14026}
14027
14028
14029/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14030///
14033 const ARMSubtarget *Subtarget) {
14034 SDValue N0 = N->getOperand(0);
14035 SDValue N1 = N->getOperand(1);
14036
14037 // Only works one way, because it needs an immediate operand.
14038 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14039 return Result;
14040
14041 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14042 return Result;
14043
14044 // First try with the default operand order.
14045 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14046 return Result;
14047
14048 // If that didn't work, try again with the operands commuted.
14049 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14050}
14051
14052// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14053// providing -X is as cheap as X (currently, just a constant).
14055 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14056 return SDValue();
14057 SDValue CSINC = N->getOperand(1);
14058 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14059 return SDValue();
14060
14061 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14062 if (!X)
14063 return SDValue();
14064
14065 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14066 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14067 CSINC.getOperand(0)),
14068 CSINC.getOperand(1), CSINC.getOperand(2),
14069 CSINC.getOperand(3));
14070}
14071
14072/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14073///
14076 const ARMSubtarget *Subtarget) {
14077 SDValue N0 = N->getOperand(0);
14078 SDValue N1 = N->getOperand(1);
14079
14080 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14081 if (N1.getNode()->hasOneUse())
14082 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14083 return Result;
14084
14085 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14086 return R;
14087
14088 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14089 return SDValue();
14090
14091 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14092 // so that we can readily pattern match more mve instructions which can use
14093 // a scalar operand.
14094 SDValue VDup = N->getOperand(1);
14095 if (VDup->getOpcode() != ARMISD::VDUP)
14096 return SDValue();
14097
14098 SDValue VMov = N->getOperand(0);
14099 if (VMov->getOpcode() == ISD::BITCAST)
14100 VMov = VMov->getOperand(0);
14101
14102 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14103 return SDValue();
14104
14105 SDLoc dl(N);
14106 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14107 DCI.DAG.getConstant(0, dl, MVT::i32),
14108 VDup->getOperand(0));
14109 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14110}
14111
14112/// PerformVMULCombine
14113/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14114/// special multiplier accumulator forwarding.
14115/// vmul d3, d0, d2
14116/// vmla d3, d1, d2
14117/// is faster than
14118/// vadd d3, d0, d1
14119/// vmul d3, d3, d2
14120// However, for (A + B) * (A + B),
14121// vadd d2, d0, d1
14122// vmul d3, d0, d2
14123// vmla d3, d1, d2
14124// is slower than
14125// vadd d2, d0, d1
14126// vmul d3, d2, d2
14129 const ARMSubtarget *Subtarget) {
14130 if (!Subtarget->hasVMLxForwarding())
14131 return SDValue();
14132
14133 SelectionDAG &DAG = DCI.DAG;
14134 SDValue N0 = N->getOperand(0);
14135 SDValue N1 = N->getOperand(1);
14136 unsigned Opcode = N0.getOpcode();
14137 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14138 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14139 Opcode = N1.getOpcode();
14140 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14141 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14142 return SDValue();
14143 std::swap(N0, N1);
14144 }
14145
14146 if (N0 == N1)
14147 return SDValue();
14148
14149 EVT VT = N->getValueType(0);
14150 SDLoc DL(N);
14151 SDValue N00 = N0->getOperand(0);
14152 SDValue N01 = N0->getOperand(1);
14153 return DAG.getNode(Opcode, DL, VT,
14154 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14155 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14156}
14157
14159 const ARMSubtarget *Subtarget) {
14160 EVT VT = N->getValueType(0);
14161 if (VT != MVT::v2i64)
14162 return SDValue();
14163
14164 SDValue N0 = N->getOperand(0);
14165 SDValue N1 = N->getOperand(1);
14166
14167 auto IsSignExt = [&](SDValue Op) {
14168 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14169 return SDValue();
14170 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14171 if (VT.getScalarSizeInBits() == 32)
14172 return Op->getOperand(0);
14173 return SDValue();
14174 };
14175 auto IsZeroExt = [&](SDValue Op) {
14176 // Zero extends are a little more awkward. At the point we are matching
14177 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14178 // That might be before of after a bitcast depending on how the and is
14179 // placed. Because this has to look through bitcasts, it is currently only
14180 // supported on LE.
14181 if (!Subtarget->isLittle())
14182 return SDValue();
14183
14184 SDValue And = Op;
14185 if (And->getOpcode() == ISD::BITCAST)
14186 And = And->getOperand(0);
14187 if (And->getOpcode() != ISD::AND)
14188 return SDValue();
14189 SDValue Mask = And->getOperand(1);
14190 if (Mask->getOpcode() == ISD::BITCAST)
14191 Mask = Mask->getOperand(0);
14192
14193 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14194 Mask.getValueType() != MVT::v4i32)
14195 return SDValue();
14196 if (isAllOnesConstant(Mask->getOperand(0)) &&
14197 isNullConstant(Mask->getOperand(1)) &&
14198 isAllOnesConstant(Mask->getOperand(2)) &&
14199 isNullConstant(Mask->getOperand(3)))
14200 return And->getOperand(0);
14201 return SDValue();
14202 };
14203
14204 SDLoc dl(N);
14205 if (SDValue Op0 = IsSignExt(N0)) {
14206 if (SDValue Op1 = IsSignExt(N1)) {
14207 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14208 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14209 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14210 }
14211 }
14212 if (SDValue Op0 = IsZeroExt(N0)) {
14213 if (SDValue Op1 = IsZeroExt(N1)) {
14214 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14215 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14216 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14217 }
14218 }
14219
14220 return SDValue();
14221}
14222
14225 const ARMSubtarget *Subtarget) {
14226 SelectionDAG &DAG = DCI.DAG;
14227
14228 EVT VT = N->getValueType(0);
14229 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14230 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14231
14232 if (Subtarget->isThumb1Only())
14233 return SDValue();
14234
14235 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14236 return SDValue();
14237
14238 if (VT.is64BitVector() || VT.is128BitVector())
14239 return PerformVMULCombine(N, DCI, Subtarget);
14240 if (VT != MVT::i32)
14241 return SDValue();
14242
14243 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14244 if (!C)
14245 return SDValue();
14246
14247 int64_t MulAmt = C->getSExtValue();
14248 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14249
14250 ShiftAmt = ShiftAmt & (32 - 1);
14251 SDValue V = N->getOperand(0);
14252 SDLoc DL(N);
14253
14254 SDValue Res;
14255 MulAmt >>= ShiftAmt;
14256
14257 if (MulAmt >= 0) {
14258 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14259 // (mul x, 2^N + 1) => (add (shl x, N), x)
14260 Res = DAG.getNode(ISD::ADD, DL, VT,
14261 V,
14262 DAG.getNode(ISD::SHL, DL, VT,
14263 V,
14264 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14265 MVT::i32)));
14266 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14267 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14268 Res = DAG.getNode(ISD::SUB, DL, VT,
14269 DAG.getNode(ISD::SHL, DL, VT,
14270 V,
14271 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14272 MVT::i32)),
14273 V);
14274 } else
14275 return SDValue();
14276 } else {
14277 uint64_t MulAmtAbs = -MulAmt;
14278 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14279 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14280 Res = DAG.getNode(ISD::SUB, DL, VT,
14281 V,
14282 DAG.getNode(ISD::SHL, DL, VT,
14283 V,
14284 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14285 MVT::i32)));
14286 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14287 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14288 Res = DAG.getNode(ISD::ADD, DL, VT,
14289 V,
14290 DAG.getNode(ISD::SHL, DL, VT,
14291 V,
14292 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14293 MVT::i32)));
14294 Res = DAG.getNode(ISD::SUB, DL, VT,
14295 DAG.getConstant(0, DL, MVT::i32), Res);
14296 } else
14297 return SDValue();
14298 }
14299
14300 if (ShiftAmt != 0)
14301 Res = DAG.getNode(ISD::SHL, DL, VT,
14302 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14303
14304 // Do not add new nodes to DAG combiner worklist.
14305 DCI.CombineTo(N, Res, false);
14306 return SDValue();
14307}
14308
14311 const ARMSubtarget *Subtarget) {
14312 // Allow DAGCombine to pattern-match before we touch the canonical form.
14313 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14314 return SDValue();
14315
14316 if (N->getValueType(0) != MVT::i32)
14317 return SDValue();
14318
14319 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14320 if (!N1C)
14321 return SDValue();
14322
14323 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14324 // Don't transform uxtb/uxth.
14325 if (C1 == 255 || C1 == 65535)
14326 return SDValue();
14327
14328 SDNode *N0 = N->getOperand(0).getNode();
14329 if (!N0->hasOneUse())
14330 return SDValue();
14331
14332 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14333 return SDValue();
14334
14335 bool LeftShift = N0->getOpcode() == ISD::SHL;
14336
14337 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14338 if (!N01C)
14339 return SDValue();
14340
14341 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14342 if (!C2 || C2 >= 32)
14343 return SDValue();
14344
14345 // Clear irrelevant bits in the mask.
14346 if (LeftShift)
14347 C1 &= (-1U << C2);
14348 else
14349 C1 &= (-1U >> C2);
14350
14351 SelectionDAG &DAG = DCI.DAG;
14352 SDLoc DL(N);
14353
14354 // We have a pattern of the form "(and (shl x, c2) c1)" or
14355 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14356 // transform to a pair of shifts, to save materializing c1.
14357
14358 // First pattern: right shift, then mask off leading bits.
14359 // FIXME: Use demanded bits?
14360 if (!LeftShift && isMask_32(C1)) {
14361 uint32_t C3 = llvm::countl_zero(C1);
14362 if (C2 < C3) {
14363 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14364 DAG.getConstant(C3 - C2, DL, MVT::i32));
14365 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14366 DAG.getConstant(C3, DL, MVT::i32));
14367 }
14368 }
14369
14370 // First pattern, reversed: left shift, then mask off trailing bits.
14371 if (LeftShift && isMask_32(~C1)) {
14372 uint32_t C3 = llvm::countr_zero(C1);
14373 if (C2 < C3) {
14374 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14375 DAG.getConstant(C3 - C2, DL, MVT::i32));
14376 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14377 DAG.getConstant(C3, DL, MVT::i32));
14378 }
14379 }
14380
14381 // Second pattern: left shift, then mask off leading bits.
14382 // FIXME: Use demanded bits?
14383 if (LeftShift && isShiftedMask_32(C1)) {
14384 uint32_t Trailing = llvm::countr_zero(C1);
14385 uint32_t C3 = llvm::countl_zero(C1);
14386 if (Trailing == C2 && C2 + C3 < 32) {
14387 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14388 DAG.getConstant(C2 + C3, DL, MVT::i32));
14389 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14390 DAG.getConstant(C3, DL, MVT::i32));
14391 }
14392 }
14393
14394 // Second pattern, reversed: right shift, then mask off trailing bits.
14395 // FIXME: Handle other patterns of known/demanded bits.
14396 if (!LeftShift && isShiftedMask_32(C1)) {
14397 uint32_t Leading = llvm::countl_zero(C1);
14398 uint32_t C3 = llvm::countr_zero(C1);
14399 if (Leading == C2 && C2 + C3 < 32) {
14400 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14401 DAG.getConstant(C2 + C3, DL, MVT::i32));
14402 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14403 DAG.getConstant(C3, DL, MVT::i32));
14404 }
14405 }
14406
14407 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14408 // if "c1 >> c2" is a cheaper immediate than "c1"
14409 if (LeftShift &&
14410 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14411
14412 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14413 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14414 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14415 DAG.getConstant(C2, DL, MVT::i32));
14416 }
14417
14418 return SDValue();
14419}
14420
14423 const ARMSubtarget *Subtarget) {
14424 // Attempt to use immediate-form VBIC
14425 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14426 SDLoc dl(N);
14427 EVT VT = N->getValueType(0);
14428 SelectionDAG &DAG = DCI.DAG;
14429
14430 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14431 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14432 return SDValue();
14433
14434 APInt SplatBits, SplatUndef;
14435 unsigned SplatBitSize;
14436 bool HasAnyUndefs;
14437 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14438 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14439 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14440 SplatBitSize == 64) {
14441 EVT VbicVT;
14442 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14443 SplatUndef.getZExtValue(), SplatBitSize,
14444 DAG, dl, VbicVT, VT, OtherModImm);
14445 if (Val.getNode()) {
14446 SDValue Input =
14447 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14448 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14449 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14450 }
14451 }
14452 }
14453
14454 if (!Subtarget->isThumb1Only()) {
14455 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14456 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14457 return Result;
14458
14459 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14460 return Result;
14461 }
14462
14463 if (Subtarget->isThumb1Only())
14464 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14465 return Result;
14466
14467 return SDValue();
14468}
14469
14470// Try combining OR nodes to SMULWB, SMULWT.
14473 const ARMSubtarget *Subtarget) {
14474 if (!Subtarget->hasV6Ops() ||
14475 (Subtarget->isThumb() &&
14476 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14477 return SDValue();
14478
14479 SDValue SRL = OR->getOperand(0);
14480 SDValue SHL = OR->getOperand(1);
14481
14482 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14483 SRL = OR->getOperand(1);
14484 SHL = OR->getOperand(0);
14485 }
14486 if (!isSRL16(SRL) || !isSHL16(SHL))
14487 return SDValue();
14488
14489 // The first operands to the shifts need to be the two results from the
14490 // same smul_lohi node.
14491 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14492 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14493 return SDValue();
14494
14495 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14496 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14497 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14498 return SDValue();
14499
14500 // Now we have:
14501 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14502 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14503 // For SMUWB the 16-bit value will signed extended somehow.
14504 // For SMULWT only the SRA is required.
14505 // Check both sides of SMUL_LOHI
14506 SDValue OpS16 = SMULLOHI->getOperand(0);
14507 SDValue OpS32 = SMULLOHI->getOperand(1);
14508
14509 SelectionDAG &DAG = DCI.DAG;
14510 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14511 OpS16 = OpS32;
14512 OpS32 = SMULLOHI->getOperand(0);
14513 }
14514
14515 SDLoc dl(OR);
14516 unsigned Opcode = 0;
14517 if (isS16(OpS16, DAG))
14518 Opcode = ARMISD::SMULWB;
14519 else if (isSRA16(OpS16)) {
14520 Opcode = ARMISD::SMULWT;
14521 OpS16 = OpS16->getOperand(0);
14522 }
14523 else
14524 return SDValue();
14525
14526 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14527 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14528 return SDValue(OR, 0);
14529}
14530
14533 const ARMSubtarget *Subtarget) {
14534 // BFI is only available on V6T2+
14535 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14536 return SDValue();
14537
14538 EVT VT = N->getValueType(0);
14539 SDValue N0 = N->getOperand(0);
14540 SDValue N1 = N->getOperand(1);
14541 SelectionDAG &DAG = DCI.DAG;
14542 SDLoc DL(N);
14543 // 1) or (and A, mask), val => ARMbfi A, val, mask
14544 // iff (val & mask) == val
14545 //
14546 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14547 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14548 // && mask == ~mask2
14549 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14550 // && ~mask == mask2
14551 // (i.e., copy a bitfield value into another bitfield of the same width)
14552
14553 if (VT != MVT::i32)
14554 return SDValue();
14555
14556 SDValue N00 = N0.getOperand(0);
14557
14558 // The value and the mask need to be constants so we can verify this is
14559 // actually a bitfield set. If the mask is 0xffff, we can do better
14560 // via a movt instruction, so don't use BFI in that case.
14561 SDValue MaskOp = N0.getOperand(1);
14562 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14563 if (!MaskC)
14564 return SDValue();
14565 unsigned Mask = MaskC->getZExtValue();
14566 if (Mask == 0xffff)
14567 return SDValue();
14568 SDValue Res;
14569 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14570 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14571 if (N1C) {
14572 unsigned Val = N1C->getZExtValue();
14573 if ((Val & ~Mask) != Val)
14574 return SDValue();
14575
14576 if (ARM::isBitFieldInvertedMask(Mask)) {
14577 Val >>= llvm::countr_zero(~Mask);
14578
14579 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14580 DAG.getConstant(Val, DL, MVT::i32),
14581 DAG.getConstant(Mask, DL, MVT::i32));
14582
14583 DCI.CombineTo(N, Res, false);
14584 // Return value from the original node to inform the combiner than N is
14585 // now dead.
14586 return SDValue(N, 0);
14587 }
14588 } else if (N1.getOpcode() == ISD::AND) {
14589 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14590 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14591 if (!N11C)
14592 return SDValue();
14593 unsigned Mask2 = N11C->getZExtValue();
14594
14595 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14596 // as is to match.
14597 if (ARM::isBitFieldInvertedMask(Mask) &&
14598 (Mask == ~Mask2)) {
14599 // The pack halfword instruction works better for masks that fit it,
14600 // so use that when it's available.
14601 if (Subtarget->hasDSP() &&
14602 (Mask == 0xffff || Mask == 0xffff0000))
14603 return SDValue();
14604 // 2a
14605 unsigned amt = llvm::countr_zero(Mask2);
14606 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14607 DAG.getConstant(amt, DL, MVT::i32));
14608 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14609 DAG.getConstant(Mask, DL, MVT::i32));
14610 DCI.CombineTo(N, Res, false);
14611 // Return value from the original node to inform the combiner than N is
14612 // now dead.
14613 return SDValue(N, 0);
14614 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14615 (~Mask == Mask2)) {
14616 // The pack halfword instruction works better for masks that fit it,
14617 // so use that when it's available.
14618 if (Subtarget->hasDSP() &&
14619 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14620 return SDValue();
14621 // 2b
14622 unsigned lsb = llvm::countr_zero(Mask);
14623 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14624 DAG.getConstant(lsb, DL, MVT::i32));
14625 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14626 DAG.getConstant(Mask2, DL, MVT::i32));
14627 DCI.CombineTo(N, Res, false);
14628 // Return value from the original node to inform the combiner than N is
14629 // now dead.
14630 return SDValue(N, 0);
14631 }
14632 }
14633
14634 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14635 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14637 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14638 // where lsb(mask) == #shamt and masked bits of B are known zero.
14639 SDValue ShAmt = N00.getOperand(1);
14640 unsigned ShAmtC = ShAmt->getAsZExtVal();
14641 unsigned LSB = llvm::countr_zero(Mask);
14642 if (ShAmtC != LSB)
14643 return SDValue();
14644
14645 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14646 DAG.getConstant(~Mask, DL, MVT::i32));
14647
14648 DCI.CombineTo(N, Res, false);
14649 // Return value from the original node to inform the combiner than N is
14650 // now dead.
14651 return SDValue(N, 0);
14652 }
14653
14654 return SDValue();
14655}
14656
14657static bool isValidMVECond(unsigned CC, bool IsFloat) {
14658 switch (CC) {
14659 case ARMCC::EQ:
14660 case ARMCC::NE:
14661 case ARMCC::LE:
14662 case ARMCC::GT:
14663 case ARMCC::GE:
14664 case ARMCC::LT:
14665 return true;
14666 case ARMCC::HS:
14667 case ARMCC::HI:
14668 return !IsFloat;
14669 default:
14670 return false;
14671 };
14672}
14673
14675 if (N->getOpcode() == ARMISD::VCMP)
14676 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14677 else if (N->getOpcode() == ARMISD::VCMPZ)
14678 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14679 else
14680 llvm_unreachable("Not a VCMP/VCMPZ!");
14681}
14682
14685 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14686}
14687
14689 const ARMSubtarget *Subtarget) {
14690 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14691 // together with predicates
14692 EVT VT = N->getValueType(0);
14693 SDLoc DL(N);
14694 SDValue N0 = N->getOperand(0);
14695 SDValue N1 = N->getOperand(1);
14696
14697 auto IsFreelyInvertable = [&](SDValue V) {
14698 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14699 return CanInvertMVEVCMP(V);
14700 return false;
14701 };
14702
14703 // At least one operand must be freely invertable.
14704 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14705 return SDValue();
14706
14707 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14708 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14709 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14710 return DAG.getLogicalNOT(DL, And, VT);
14711}
14712
14713/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14716 const ARMSubtarget *Subtarget) {
14717 // Attempt to use immediate-form VORR
14718 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14719 SDLoc dl(N);
14720 EVT VT = N->getValueType(0);
14721 SelectionDAG &DAG = DCI.DAG;
14722
14723 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14724 return SDValue();
14725
14726 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14727 VT == MVT::v8i1 || VT == MVT::v16i1))
14728 return PerformORCombine_i1(N, DAG, Subtarget);
14729
14730 APInt SplatBits, SplatUndef;
14731 unsigned SplatBitSize;
14732 bool HasAnyUndefs;
14733 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14734 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14735 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14736 SplatBitSize == 64) {
14737 EVT VorrVT;
14738 SDValue Val =
14739 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14740 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14741 if (Val.getNode()) {
14742 SDValue Input =
14743 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14744 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14745 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14746 }
14747 }
14748 }
14749
14750 if (!Subtarget->isThumb1Only()) {
14751 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14752 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14753 return Result;
14754 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14755 return Result;
14756 }
14757
14758 SDValue N0 = N->getOperand(0);
14759 SDValue N1 = N->getOperand(1);
14760
14761 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14762 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14764
14765 // The code below optimizes (or (and X, Y), Z).
14766 // The AND operand needs to have a single user to make these optimizations
14767 // profitable.
14768 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14769 return SDValue();
14770
14771 APInt SplatUndef;
14772 unsigned SplatBitSize;
14773 bool HasAnyUndefs;
14774
14775 APInt SplatBits0, SplatBits1;
14776 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14777 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14778 // Ensure that the second operand of both ands are constants
14779 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14780 HasAnyUndefs) && !HasAnyUndefs) {
14781 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14782 HasAnyUndefs) && !HasAnyUndefs) {
14783 // Ensure that the bit width of the constants are the same and that
14784 // the splat arguments are logical inverses as per the pattern we
14785 // are trying to simplify.
14786 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14787 SplatBits0 == ~SplatBits1) {
14788 // Canonicalize the vector type to make instruction selection
14789 // simpler.
14790 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14791 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14792 N0->getOperand(1),
14793 N0->getOperand(0),
14794 N1->getOperand(0));
14795 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14796 }
14797 }
14798 }
14799 }
14800
14801 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14802 // reasonable.
14803 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14804 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14805 return Res;
14806 }
14807
14808 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14809 return Result;
14810
14811 return SDValue();
14812}
14813
14816 const ARMSubtarget *Subtarget) {
14817 EVT VT = N->getValueType(0);
14818 SelectionDAG &DAG = DCI.DAG;
14819
14820 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14821 return SDValue();
14822
14823 if (!Subtarget->isThumb1Only()) {
14824 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14825 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14826 return Result;
14827
14828 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14829 return Result;
14830 }
14831
14832 if (Subtarget->hasMVEIntegerOps()) {
14833 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14834 SDValue N0 = N->getOperand(0);
14835 SDValue N1 = N->getOperand(1);
14836 const TargetLowering *TLI = Subtarget->getTargetLowering();
14837 if (TLI->isConstTrueVal(N1) &&
14838 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14839 if (CanInvertMVEVCMP(N0)) {
14840 SDLoc DL(N0);
14842
14844 Ops.push_back(N0->getOperand(0));
14845 if (N0->getOpcode() == ARMISD::VCMP)
14846 Ops.push_back(N0->getOperand(1));
14847 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14848 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14849 }
14850 }
14851 }
14852
14853 return SDValue();
14854}
14855
14856// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14857// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14858// their position in "to" (Rd).
14859static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14860 assert(N->getOpcode() == ARMISD::BFI);
14861
14862 SDValue From = N->getOperand(1);
14863 ToMask = ~N->getConstantOperandAPInt(2);
14864 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14865
14866 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14867 // #C in the base of the SHR.
14868 if (From->getOpcode() == ISD::SRL &&
14869 isa<ConstantSDNode>(From->getOperand(1))) {
14870 APInt Shift = From->getConstantOperandAPInt(1);
14871 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14872 FromMask <<= Shift.getLimitedValue(31);
14873 From = From->getOperand(0);
14874 }
14875
14876 return From;
14877}
14878
14879// If A and B contain one contiguous set of bits, does A | B == A . B?
14880//
14881// Neither A nor B must be zero.
14882static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14883 unsigned LastActiveBitInA = A.countr_zero();
14884 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14885 return LastActiveBitInA - 1 == FirstActiveBitInB;
14886}
14887
14889 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14890 APInt ToMask, FromMask;
14891 SDValue From = ParseBFI(N, ToMask, FromMask);
14892 SDValue To = N->getOperand(0);
14893
14894 SDValue V = To;
14895 if (V.getOpcode() != ARMISD::BFI)
14896 return SDValue();
14897
14898 APInt NewToMask, NewFromMask;
14899 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14900 if (NewFrom != From)
14901 return SDValue();
14902
14903 // Do the written bits conflict with any we've seen so far?
14904 if ((NewToMask & ToMask).getBoolValue())
14905 // Conflicting bits.
14906 return SDValue();
14907
14908 // Are the new bits contiguous when combined with the old bits?
14909 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14910 BitsProperlyConcatenate(FromMask, NewFromMask))
14911 return V;
14912 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14913 BitsProperlyConcatenate(NewFromMask, FromMask))
14914 return V;
14915
14916 return SDValue();
14917}
14918
14920 SDValue N0 = N->getOperand(0);
14921 SDValue N1 = N->getOperand(1);
14922
14923 if (N1.getOpcode() == ISD::AND) {
14924 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14925 // the bits being cleared by the AND are not demanded by the BFI.
14926 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14927 if (!N11C)
14928 return SDValue();
14929 unsigned InvMask = N->getConstantOperandVal(2);
14930 unsigned LSB = llvm::countr_zero(~InvMask);
14931 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14932 assert(Width <
14933 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14934 "undefined behavior");
14935 unsigned Mask = (1u << Width) - 1;
14936 unsigned Mask2 = N11C->getZExtValue();
14937 if ((Mask & (~Mask2)) == 0)
14938 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14939 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14940 return SDValue();
14941 }
14942
14943 // Look for another BFI to combine with.
14944 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14945 // We've found a BFI.
14946 APInt ToMask1, FromMask1;
14947 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14948
14949 APInt ToMask2, FromMask2;
14950 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14951 assert(From1 == From2);
14952 (void)From2;
14953
14954 // Create a new BFI, combining the two together.
14955 APInt NewFromMask = FromMask1 | FromMask2;
14956 APInt NewToMask = ToMask1 | ToMask2;
14957
14958 EVT VT = N->getValueType(0);
14959 SDLoc dl(N);
14960
14961 if (NewFromMask[0] == 0)
14962 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14963 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14964 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14965 DAG.getConstant(~NewToMask, dl, VT));
14966 }
14967
14968 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14969 // that lower bit insertions are performed first, providing that M1 and M2
14970 // do no overlap. This can allow multiple BFI instructions to be combined
14971 // together by the other folds above.
14972 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14973 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14974 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14975
14976 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14977 ToMask1.countl_zero() < ToMask2.countl_zero())
14978 return SDValue();
14979
14980 EVT VT = N->getValueType(0);
14981 SDLoc dl(N);
14982 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14983 N->getOperand(1), N->getOperand(2));
14984 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14985 N0.getOperand(2));
14986 }
14987
14988 return SDValue();
14989}
14990
14991// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14992// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14993// return X if valid.
14995 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14996 return SDValue();
14997 SDValue CSInc = Cmp->getOperand(0);
14998
14999 // Ignore any `And 1` nodes that may not yet have been removed. We are
15000 // looking for a value that produces 1/0, so these have no effect on the
15001 // code.
15002 while (CSInc.getOpcode() == ISD::AND &&
15003 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
15004 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15005 CSInc = CSInc.getOperand(0);
15006
15007 if (CSInc.getOpcode() == ARMISD::CSINC &&
15008 isNullConstant(CSInc.getOperand(0)) &&
15009 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15011 return CSInc.getOperand(3);
15012 }
15013 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15014 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15016 return CSInc.getOperand(4);
15017 }
15018 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15019 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15022 return CSInc.getOperand(4);
15023 }
15024 return SDValue();
15025}
15026
15028 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15029 // t92: glue = ARMISD::CMPZ t74, 0
15030 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15031 // t96: glue = ARMISD::CMPZ t93, 0
15032 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15034 if (SDValue C = IsCMPZCSINC(N, Cond))
15035 if (Cond == ARMCC::EQ)
15036 return C;
15037 return SDValue();
15038}
15039
15041 // Fold away an unneccessary CMPZ/CSINC
15042 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15043 // if C1==EQ -> CSXYZ A, B, C2, D
15044 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15046 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15047 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15048 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15049 N->getOperand(1),
15050 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15051 if (N->getConstantOperandVal(2) == ARMCC::NE)
15052 return DAG.getNode(
15053 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15054 N->getOperand(1),
15056 }
15057 return SDValue();
15058}
15059
15060/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15061/// ARMISD::VMOVRRD.
15064 const ARMSubtarget *Subtarget) {
15065 // vmovrrd(vmovdrr x, y) -> x,y
15066 SDValue InDouble = N->getOperand(0);
15067 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15068 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15069
15070 // vmovrrd(load f64) -> (load i32), (load i32)
15071 SDNode *InNode = InDouble.getNode();
15072 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15073 InNode->getValueType(0) == MVT::f64 &&
15074 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15075 !cast<LoadSDNode>(InNode)->isVolatile()) {
15076 // TODO: Should this be done for non-FrameIndex operands?
15077 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15078
15079 SelectionDAG &DAG = DCI.DAG;
15080 SDLoc DL(LD);
15081 SDValue BasePtr = LD->getBasePtr();
15082 SDValue NewLD1 =
15083 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15084 LD->getAlign(), LD->getMemOperand()->getFlags());
15085
15086 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15087 DAG.getConstant(4, DL, MVT::i32));
15088
15089 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15090 LD->getPointerInfo().getWithOffset(4),
15091 commonAlignment(LD->getAlign(), 4),
15092 LD->getMemOperand()->getFlags());
15093
15094 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15095 if (DCI.DAG.getDataLayout().isBigEndian())
15096 std::swap (NewLD1, NewLD2);
15097 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15098 return Result;
15099 }
15100
15101 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15102 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15103 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15104 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15105 SDValue BV = InDouble.getOperand(0);
15106 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15107 // change lane order under big endian.
15108 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15109 while (
15110 (BV.getOpcode() == ISD::BITCAST ||
15112 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15113 BVSwap = BV.getOpcode() == ISD::BITCAST;
15114 BV = BV.getOperand(0);
15115 }
15116 if (BV.getValueType() != MVT::v4i32)
15117 return SDValue();
15118
15119 // Handle buildvectors, pulling out the correct lane depending on
15120 // endianness.
15121 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15122 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15123 SDValue Op0 = BV.getOperand(Offset);
15124 SDValue Op1 = BV.getOperand(Offset + 1);
15125 if (!Subtarget->isLittle() && BVSwap)
15126 std::swap(Op0, Op1);
15127
15128 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15129 }
15130
15131 // A chain of insert_vectors, grabbing the correct value of the chain of
15132 // inserts.
15133 SDValue Op0, Op1;
15134 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15135 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15136 if (BV.getConstantOperandVal(2) == Offset)
15137 Op0 = BV.getOperand(1);
15138 if (BV.getConstantOperandVal(2) == Offset + 1)
15139 Op1 = BV.getOperand(1);
15140 }
15141 BV = BV.getOperand(0);
15142 }
15143 if (!Subtarget->isLittle() && BVSwap)
15144 std::swap(Op0, Op1);
15145 if (Op0 && Op1)
15146 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15147 }
15148
15149 return SDValue();
15150}
15151
15152/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15153/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15155 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15156 SDValue Op0 = N->getOperand(0);
15157 SDValue Op1 = N->getOperand(1);
15158 if (Op0.getOpcode() == ISD::BITCAST)
15159 Op0 = Op0.getOperand(0);
15160 if (Op1.getOpcode() == ISD::BITCAST)
15161 Op1 = Op1.getOperand(0);
15162 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15163 Op0.getNode() == Op1.getNode() &&
15164 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15165 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15166 N->getValueType(0), Op0.getOperand(0));
15167 return SDValue();
15168}
15169
15172 SDValue Op0 = N->getOperand(0);
15173
15174 // VMOVhr (VMOVrh (X)) -> X
15175 if (Op0->getOpcode() == ARMISD::VMOVrh)
15176 return Op0->getOperand(0);
15177
15178 // FullFP16: half values are passed in S-registers, and we don't
15179 // need any of the bitcast and moves:
15180 //
15181 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15182 // t5: i32 = bitcast t2
15183 // t18: f16 = ARMISD::VMOVhr t5
15184 // =>
15185 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15186 if (Op0->getOpcode() == ISD::BITCAST) {
15187 SDValue Copy = Op0->getOperand(0);
15188 if (Copy.getValueType() == MVT::f32 &&
15189 Copy->getOpcode() == ISD::CopyFromReg) {
15190 bool HasGlue = Copy->getNumOperands() == 3;
15191 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15192 HasGlue ? Copy->getOperand(2) : SDValue()};
15193 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15194 SDValue NewCopy =
15196 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15197 ArrayRef(Ops, HasGlue ? 3 : 2));
15198
15199 // Update Users, Chains, and Potential Glue.
15200 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15201 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15202 if (HasGlue)
15203 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15204 NewCopy.getValue(2));
15205
15206 return NewCopy;
15207 }
15208 }
15209
15210 // fold (VMOVhr (load x)) -> (load (f16*)x)
15211 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15212 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15213 LN0->getMemoryVT() == MVT::i16) {
15214 SDValue Load =
15215 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15216 LN0->getBasePtr(), LN0->getMemOperand());
15217 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15218 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15219 return Load;
15220 }
15221 }
15222
15223 // Only the bottom 16 bits of the source register are used.
15224 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15225 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15226 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15227 return SDValue(N, 0);
15228
15229 return SDValue();
15230}
15231
15233 SDValue N0 = N->getOperand(0);
15234 EVT VT = N->getValueType(0);
15235
15236 // fold (VMOVrh (fpconst x)) -> const x
15237 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15238 APFloat V = C->getValueAPF();
15239 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15240 }
15241
15242 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15243 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15244 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15245
15246 SDValue Load =
15247 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15248 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15249 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15250 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15251 return Load;
15252 }
15253
15254 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15255 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15256 isa<ConstantSDNode>(N0->getOperand(1)))
15257 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15258 N0->getOperand(1));
15259
15260 return SDValue();
15261}
15262
15263/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15264/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15265/// i64 vector to have f64 elements, since the value can then be loaded
15266/// directly into a VFP register.
15268 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15269 for (unsigned i = 0; i < NumElts; ++i) {
15270 SDNode *Elt = N->getOperand(i).getNode();
15271 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15272 return true;
15273 }
15274 return false;
15275}
15276
15277/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15278/// ISD::BUILD_VECTOR.
15281 const ARMSubtarget *Subtarget) {
15282 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15283 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15284 // into a pair of GPRs, which is fine when the value is used as a scalar,
15285 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15286 SelectionDAG &DAG = DCI.DAG;
15287 if (N->getNumOperands() == 2)
15288 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15289 return RV;
15290
15291 // Load i64 elements as f64 values so that type legalization does not split
15292 // them up into i32 values.
15293 EVT VT = N->getValueType(0);
15294 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15295 return SDValue();
15296 SDLoc dl(N);
15298 unsigned NumElts = VT.getVectorNumElements();
15299 for (unsigned i = 0; i < NumElts; ++i) {
15300 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15301 Ops.push_back(V);
15302 // Make the DAGCombiner fold the bitcast.
15303 DCI.AddToWorklist(V.getNode());
15304 }
15305 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15306 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15307 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15308}
15309
15310/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15311static SDValue
15313 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15314 // At that time, we may have inserted bitcasts from integer to float.
15315 // If these bitcasts have survived DAGCombine, change the lowering of this
15316 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15317 // force to use floating point types.
15318
15319 // Make sure we can change the type of the vector.
15320 // This is possible iff:
15321 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15322 // 1.1. Vector is used only once.
15323 // 1.2. Use is a bit convert to an integer type.
15324 // 2. The size of its operands are 32-bits (64-bits are not legal).
15325 EVT VT = N->getValueType(0);
15326 EVT EltVT = VT.getVectorElementType();
15327
15328 // Check 1.1. and 2.
15329 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15330 return SDValue();
15331
15332 // By construction, the input type must be float.
15333 assert(EltVT == MVT::f32 && "Unexpected type!");
15334
15335 // Check 1.2.
15336 SDNode *Use = *N->use_begin();
15337 if (Use->getOpcode() != ISD::BITCAST ||
15338 Use->getValueType(0).isFloatingPoint())
15339 return SDValue();
15340
15341 // Check profitability.
15342 // Model is, if more than half of the relevant operands are bitcast from
15343 // i32, turn the build_vector into a sequence of insert_vector_elt.
15344 // Relevant operands are everything that is not statically
15345 // (i.e., at compile time) bitcasted.
15346 unsigned NumOfBitCastedElts = 0;
15347 unsigned NumElts = VT.getVectorNumElements();
15348 unsigned NumOfRelevantElts = NumElts;
15349 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15350 SDValue Elt = N->getOperand(Idx);
15351 if (Elt->getOpcode() == ISD::BITCAST) {
15352 // Assume only bit cast to i32 will go away.
15353 if (Elt->getOperand(0).getValueType() == MVT::i32)
15354 ++NumOfBitCastedElts;
15355 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15356 // Constants are statically casted, thus do not count them as
15357 // relevant operands.
15358 --NumOfRelevantElts;
15359 }
15360
15361 // Check if more than half of the elements require a non-free bitcast.
15362 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15363 return SDValue();
15364
15365 SelectionDAG &DAG = DCI.DAG;
15366 // Create the new vector type.
15367 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15368 // Check if the type is legal.
15369 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15370 if (!TLI.isTypeLegal(VecVT))
15371 return SDValue();
15372
15373 // Combine:
15374 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15375 // => BITCAST INSERT_VECTOR_ELT
15376 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15377 // (BITCAST EN), N.
15378 SDValue Vec = DAG.getUNDEF(VecVT);
15379 SDLoc dl(N);
15380 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15381 SDValue V = N->getOperand(Idx);
15382 if (V.isUndef())
15383 continue;
15384 if (V.getOpcode() == ISD::BITCAST &&
15385 V->getOperand(0).getValueType() == MVT::i32)
15386 // Fold obvious case.
15387 V = V.getOperand(0);
15388 else {
15389 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15390 // Make the DAGCombiner fold the bitcasts.
15391 DCI.AddToWorklist(V.getNode());
15392 }
15393 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15394 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15395 }
15396 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15397 // Make the DAGCombiner fold the bitcasts.
15398 DCI.AddToWorklist(Vec.getNode());
15399 return Vec;
15400}
15401
15402static SDValue
15404 EVT VT = N->getValueType(0);
15405 SDValue Op = N->getOperand(0);
15406 SDLoc dl(N);
15407
15408 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15409 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15410 // If the valuetypes are the same, we can remove the cast entirely.
15411 if (Op->getOperand(0).getValueType() == VT)
15412 return Op->getOperand(0);
15413 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15414 }
15415
15416 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15417 // more VPNOT which might get folded as else predicates.
15418 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15419 SDValue X =
15420 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15422 DCI.DAG.getConstant(65535, dl, MVT::i32));
15423 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15424 }
15425
15426 // Only the bottom 16 bits of the source register are used.
15427 if (Op.getValueType() == MVT::i32) {
15428 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15429 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15430 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15431 return SDValue(N, 0);
15432 }
15433 return SDValue();
15434}
15435
15437 const ARMSubtarget *ST) {
15438 EVT VT = N->getValueType(0);
15439 SDValue Op = N->getOperand(0);
15440 SDLoc dl(N);
15441
15442 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15443 if (ST->isLittle())
15444 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15445
15446 // VECTOR_REG_CAST undef -> undef
15447 if (Op.isUndef())
15448 return DAG.getUNDEF(VT);
15449
15450 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15451 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15452 // If the valuetypes are the same, we can remove the cast entirely.
15453 if (Op->getOperand(0).getValueType() == VT)
15454 return Op->getOperand(0);
15455 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15456 }
15457
15458 return SDValue();
15459}
15460
15462 const ARMSubtarget *Subtarget) {
15463 if (!Subtarget->hasMVEIntegerOps())
15464 return SDValue();
15465
15466 EVT VT = N->getValueType(0);
15467 SDValue Op0 = N->getOperand(0);
15468 SDValue Op1 = N->getOperand(1);
15469 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15470 SDLoc dl(N);
15471
15472 // vcmp X, 0, cc -> vcmpz X, cc
15473 if (isZeroVector(Op1))
15474 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15475
15476 unsigned SwappedCond = getSwappedCondition(Cond);
15477 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15478 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15479 if (isZeroVector(Op0))
15480 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15481 DAG.getConstant(SwappedCond, dl, MVT::i32));
15482 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15483 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15484 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15485 DAG.getConstant(SwappedCond, dl, MVT::i32));
15486 }
15487
15488 return SDValue();
15489}
15490
15491/// PerformInsertEltCombine - Target-specific dag combine xforms for
15492/// ISD::INSERT_VECTOR_ELT.
15495 // Bitcast an i64 load inserted into a vector to f64.
15496 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15497 EVT VT = N->getValueType(0);
15498 SDNode *Elt = N->getOperand(1).getNode();
15499 if (VT.getVectorElementType() != MVT::i64 ||
15500 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15501 return SDValue();
15502
15503 SelectionDAG &DAG = DCI.DAG;
15504 SDLoc dl(N);
15505 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15507 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15508 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15509 // Make the DAGCombiner fold the bitcasts.
15510 DCI.AddToWorklist(Vec.getNode());
15511 DCI.AddToWorklist(V.getNode());
15512 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15513 Vec, V, N->getOperand(2));
15514 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15515}
15516
15517// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15518// directly or bitcast to an integer if the original is a float vector.
15519// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15520// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15521static SDValue
15523 EVT VT = N->getValueType(0);
15524 SDLoc dl(N);
15525
15526 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15527 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15528 return SDValue();
15529
15530 SDValue Ext = SDValue(N, 0);
15531 if (Ext.getOpcode() == ISD::BITCAST &&
15532 Ext.getOperand(0).getValueType() == MVT::f32)
15533 Ext = Ext.getOperand(0);
15534 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15535 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15536 Ext.getConstantOperandVal(1) % 2 != 0)
15537 return SDValue();
15538 if (Ext->use_size() == 1 &&
15539 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15540 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15541 return SDValue();
15542
15543 SDValue Op0 = Ext.getOperand(0);
15544 EVT VecVT = Op0.getValueType();
15545 unsigned ResNo = Op0.getResNo();
15546 unsigned Lane = Ext.getConstantOperandVal(1);
15547 if (VecVT.getVectorNumElements() != 4)
15548 return SDValue();
15549
15550 // Find another extract, of Lane + 1
15551 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15552 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15553 isa<ConstantSDNode>(V->getOperand(1)) &&
15554 V->getConstantOperandVal(1) == Lane + 1 &&
15555 V->getOperand(0).getResNo() == ResNo;
15556 });
15557 if (OtherIt == Op0->uses().end())
15558 return SDValue();
15559
15560 // For float extracts, we need to be converting to a i32 for both vector
15561 // lanes.
15562 SDValue OtherExt(*OtherIt, 0);
15563 if (OtherExt.getValueType() != MVT::i32) {
15564 if (OtherExt->use_size() != 1 ||
15565 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15566 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15567 return SDValue();
15568 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15569 }
15570
15571 // Convert the type to a f64 and extract with a VMOVRRD.
15572 SDValue F64 = DCI.DAG.getNode(
15573 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15574 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15575 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15576 SDValue VMOVRRD =
15577 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15578
15579 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15580 return VMOVRRD;
15581}
15582
15585 const ARMSubtarget *ST) {
15586 SDValue Op0 = N->getOperand(0);
15587 EVT VT = N->getValueType(0);
15588 SDLoc dl(N);
15589
15590 // extract (vdup x) -> x
15591 if (Op0->getOpcode() == ARMISD::VDUP) {
15592 SDValue X = Op0->getOperand(0);
15593 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15594 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15595 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15596 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15597 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15598 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15599
15600 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15601 X = X->getOperand(0);
15602 if (X.getValueType() == VT)
15603 return X;
15604 }
15605
15606 // extract ARM_BUILD_VECTOR -> x
15607 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15608 isa<ConstantSDNode>(N->getOperand(1)) &&
15609 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15610 return Op0.getOperand(N->getConstantOperandVal(1));
15611 }
15612
15613 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15614 if (Op0.getValueType() == MVT::v4i32 &&
15615 isa<ConstantSDNode>(N->getOperand(1)) &&
15616 Op0.getOpcode() == ISD::BITCAST &&
15618 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15619 SDValue BV = Op0.getOperand(0);
15620 unsigned Offset = N->getConstantOperandVal(1);
15621 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15622 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15623 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15624 }
15625
15626 // extract x, n; extract x, n+1 -> VMOVRRD x
15627 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15628 return R;
15629
15630 // extract (MVETrunc(x)) -> extract x
15631 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15632 unsigned Idx = N->getConstantOperandVal(1);
15633 unsigned Vec =
15635 unsigned SubIdx =
15637 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15638 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15639 }
15640
15641 return SDValue();
15642}
15643
15645 SDValue Op = N->getOperand(0);
15646 EVT VT = N->getValueType(0);
15647
15648 // sext_inreg(VGETLANEu) -> VGETLANEs
15649 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15650 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15651 Op.getOperand(0).getValueType().getScalarType())
15652 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15653 Op.getOperand(1));
15654
15655 return SDValue();
15656}
15657
15658static SDValue
15660 SDValue Vec = N->getOperand(0);
15661 SDValue SubVec = N->getOperand(1);
15662 uint64_t IdxVal = N->getConstantOperandVal(2);
15663 EVT VecVT = Vec.getValueType();
15664 EVT SubVT = SubVec.getValueType();
15665
15666 // Only do this for legal fixed vector types.
15667 if (!VecVT.isFixedLengthVector() ||
15668 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15670 return SDValue();
15671
15672 // Ignore widening patterns.
15673 if (IdxVal == 0 && Vec.isUndef())
15674 return SDValue();
15675
15676 // Subvector must be half the width and an "aligned" insertion.
15677 unsigned NumSubElts = SubVT.getVectorNumElements();
15678 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15679 (IdxVal != 0 && IdxVal != NumSubElts))
15680 return SDValue();
15681
15682 // Fold insert_subvector -> concat_vectors
15683 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15684 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15685 SDLoc DL(N);
15686 SDValue Lo, Hi;
15687 if (IdxVal == 0) {
15688 Lo = SubVec;
15689 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15690 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15691 } else {
15692 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15693 DCI.DAG.getVectorIdxConstant(0, DL));
15694 Hi = SubVec;
15695 }
15696 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15697}
15698
15699// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15701 SelectionDAG &DAG) {
15702 SDValue Trunc = N->getOperand(0);
15703 EVT VT = Trunc.getValueType();
15704 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15705 return SDValue();
15706
15707 SDLoc DL(Trunc);
15708 if (isVMOVNTruncMask(N->getMask(), VT, false))
15709 return DAG.getNode(
15710 ARMISD::VMOVN, DL, VT,
15711 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15712 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15713 DAG.getConstant(1, DL, MVT::i32));
15714 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15715 return DAG.getNode(
15716 ARMISD::VMOVN, DL, VT,
15717 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15718 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15719 DAG.getConstant(1, DL, MVT::i32));
15720 return SDValue();
15721}
15722
15723/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15724/// ISD::VECTOR_SHUFFLE.
15726 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15727 return R;
15728
15729 // The LLVM shufflevector instruction does not require the shuffle mask
15730 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15731 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15732 // operands do not match the mask length, they are extended by concatenating
15733 // them with undef vectors. That is probably the right thing for other
15734 // targets, but for NEON it is better to concatenate two double-register
15735 // size vector operands into a single quad-register size vector. Do that
15736 // transformation here:
15737 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15738 // shuffle(concat(v1, v2), undef)
15739 SDValue Op0 = N->getOperand(0);
15740 SDValue Op1 = N->getOperand(1);
15741 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15742 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15743 Op0.getNumOperands() != 2 ||
15744 Op1.getNumOperands() != 2)
15745 return SDValue();
15746 SDValue Concat0Op1 = Op0.getOperand(1);
15747 SDValue Concat1Op1 = Op1.getOperand(1);
15748 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15749 return SDValue();
15750 // Skip the transformation if any of the types are illegal.
15751 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15752 EVT VT = N->getValueType(0);
15753 if (!TLI.isTypeLegal(VT) ||
15754 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15755 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15756 return SDValue();
15757
15758 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15759 Op0.getOperand(0), Op1.getOperand(0));
15760 // Translate the shuffle mask.
15761 SmallVector<int, 16> NewMask;
15762 unsigned NumElts = VT.getVectorNumElements();
15763 unsigned HalfElts = NumElts/2;
15764 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15765 for (unsigned n = 0; n < NumElts; ++n) {
15766 int MaskElt = SVN->getMaskElt(n);
15767 int NewElt = -1;
15768 if (MaskElt < (int)HalfElts)
15769 NewElt = MaskElt;
15770 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15771 NewElt = HalfElts + MaskElt - NumElts;
15772 NewMask.push_back(NewElt);
15773 }
15774 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15775 DAG.getUNDEF(VT), NewMask);
15776}
15777
15778/// Load/store instruction that can be merged with a base address
15779/// update
15784 unsigned AddrOpIdx;
15785};
15786
15788 /// Instruction that updates a pointer
15790 /// Pointer increment operand
15792 /// Pointer increment value if it is a constant, or 0 otherwise
15793 unsigned ConstInc;
15794};
15795
15797 struct BaseUpdateUser &User,
15798 bool SimpleConstIncOnly,
15800 SelectionDAG &DAG = DCI.DAG;
15801 SDNode *N = Target.N;
15802 MemSDNode *MemN = cast<MemSDNode>(N);
15803 SDLoc dl(N);
15804
15805 // Find the new opcode for the updating load/store.
15806 bool isLoadOp = true;
15807 bool isLaneOp = false;
15808 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15809 // as an operand.
15810 bool hasAlignment = true;
15811 unsigned NewOpc = 0;
15812 unsigned NumVecs = 0;
15813 if (Target.isIntrinsic) {
15814 unsigned IntNo = N->getConstantOperandVal(1);
15815 switch (IntNo) {
15816 default:
15817 llvm_unreachable("unexpected intrinsic for Neon base update");
15818 case Intrinsic::arm_neon_vld1:
15819 NewOpc = ARMISD::VLD1_UPD;
15820 NumVecs = 1;
15821 break;
15822 case Intrinsic::arm_neon_vld2:
15823 NewOpc = ARMISD::VLD2_UPD;
15824 NumVecs = 2;
15825 break;
15826 case Intrinsic::arm_neon_vld3:
15827 NewOpc = ARMISD::VLD3_UPD;
15828 NumVecs = 3;
15829 break;
15830 case Intrinsic::arm_neon_vld4:
15831 NewOpc = ARMISD::VLD4_UPD;
15832 NumVecs = 4;
15833 break;
15834 case Intrinsic::arm_neon_vld1x2:
15835 NewOpc = ARMISD::VLD1x2_UPD;
15836 NumVecs = 2;
15837 hasAlignment = false;
15838 break;
15839 case Intrinsic::arm_neon_vld1x3:
15840 NewOpc = ARMISD::VLD1x3_UPD;
15841 NumVecs = 3;
15842 hasAlignment = false;
15843 break;
15844 case Intrinsic::arm_neon_vld1x4:
15845 NewOpc = ARMISD::VLD1x4_UPD;
15846 NumVecs = 4;
15847 hasAlignment = false;
15848 break;
15849 case Intrinsic::arm_neon_vld2dup:
15850 NewOpc = ARMISD::VLD2DUP_UPD;
15851 NumVecs = 2;
15852 break;
15853 case Intrinsic::arm_neon_vld3dup:
15854 NewOpc = ARMISD::VLD3DUP_UPD;
15855 NumVecs = 3;
15856 break;
15857 case Intrinsic::arm_neon_vld4dup:
15858 NewOpc = ARMISD::VLD4DUP_UPD;
15859 NumVecs = 4;
15860 break;
15861 case Intrinsic::arm_neon_vld2lane:
15862 NewOpc = ARMISD::VLD2LN_UPD;
15863 NumVecs = 2;
15864 isLaneOp = true;
15865 break;
15866 case Intrinsic::arm_neon_vld3lane:
15867 NewOpc = ARMISD::VLD3LN_UPD;
15868 NumVecs = 3;
15869 isLaneOp = true;
15870 break;
15871 case Intrinsic::arm_neon_vld4lane:
15872 NewOpc = ARMISD::VLD4LN_UPD;
15873 NumVecs = 4;
15874 isLaneOp = true;
15875 break;
15876 case Intrinsic::arm_neon_vst1:
15877 NewOpc = ARMISD::VST1_UPD;
15878 NumVecs = 1;
15879 isLoadOp = false;
15880 break;
15881 case Intrinsic::arm_neon_vst2:
15882 NewOpc = ARMISD::VST2_UPD;
15883 NumVecs = 2;
15884 isLoadOp = false;
15885 break;
15886 case Intrinsic::arm_neon_vst3:
15887 NewOpc = ARMISD::VST3_UPD;
15888 NumVecs = 3;
15889 isLoadOp = false;
15890 break;
15891 case Intrinsic::arm_neon_vst4:
15892 NewOpc = ARMISD::VST4_UPD;
15893 NumVecs = 4;
15894 isLoadOp = false;
15895 break;
15896 case Intrinsic::arm_neon_vst2lane:
15897 NewOpc = ARMISD::VST2LN_UPD;
15898 NumVecs = 2;
15899 isLoadOp = false;
15900 isLaneOp = true;
15901 break;
15902 case Intrinsic::arm_neon_vst3lane:
15903 NewOpc = ARMISD::VST3LN_UPD;
15904 NumVecs = 3;
15905 isLoadOp = false;
15906 isLaneOp = true;
15907 break;
15908 case Intrinsic::arm_neon_vst4lane:
15909 NewOpc = ARMISD::VST4LN_UPD;
15910 NumVecs = 4;
15911 isLoadOp = false;
15912 isLaneOp = true;
15913 break;
15914 case Intrinsic::arm_neon_vst1x2:
15915 NewOpc = ARMISD::VST1x2_UPD;
15916 NumVecs = 2;
15917 isLoadOp = false;
15918 hasAlignment = false;
15919 break;
15920 case Intrinsic::arm_neon_vst1x3:
15921 NewOpc = ARMISD::VST1x3_UPD;
15922 NumVecs = 3;
15923 isLoadOp = false;
15924 hasAlignment = false;
15925 break;
15926 case Intrinsic::arm_neon_vst1x4:
15927 NewOpc = ARMISD::VST1x4_UPD;
15928 NumVecs = 4;
15929 isLoadOp = false;
15930 hasAlignment = false;
15931 break;
15932 }
15933 } else {
15934 isLaneOp = true;
15935 switch (N->getOpcode()) {
15936 default:
15937 llvm_unreachable("unexpected opcode for Neon base update");
15938 case ARMISD::VLD1DUP:
15939 NewOpc = ARMISD::VLD1DUP_UPD;
15940 NumVecs = 1;
15941 break;
15942 case ARMISD::VLD2DUP:
15943 NewOpc = ARMISD::VLD2DUP_UPD;
15944 NumVecs = 2;
15945 break;
15946 case ARMISD::VLD3DUP:
15947 NewOpc = ARMISD::VLD3DUP_UPD;
15948 NumVecs = 3;
15949 break;
15950 case ARMISD::VLD4DUP:
15951 NewOpc = ARMISD::VLD4DUP_UPD;
15952 NumVecs = 4;
15953 break;
15954 case ISD::LOAD:
15955 NewOpc = ARMISD::VLD1_UPD;
15956 NumVecs = 1;
15957 isLaneOp = false;
15958 break;
15959 case ISD::STORE:
15960 NewOpc = ARMISD::VST1_UPD;
15961 NumVecs = 1;
15962 isLaneOp = false;
15963 isLoadOp = false;
15964 break;
15965 }
15966 }
15967
15968 // Find the size of memory referenced by the load/store.
15969 EVT VecTy;
15970 if (isLoadOp) {
15971 VecTy = N->getValueType(0);
15972 } else if (Target.isIntrinsic) {
15973 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15974 } else {
15975 assert(Target.isStore &&
15976 "Node has to be a load, a store, or an intrinsic!");
15977 VecTy = N->getOperand(1).getValueType();
15978 }
15979
15980 bool isVLDDUPOp =
15981 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15982 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15983
15984 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15985 if (isLaneOp || isVLDDUPOp)
15986 NumBytes /= VecTy.getVectorNumElements();
15987
15988 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15989 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15990 // separate instructions that make it harder to use a non-constant update.
15991 return false;
15992 }
15993
15994 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15995 return false;
15996
15997 // OK, we found an ADD we can fold into the base update.
15998 // Now, create a _UPD node, taking care of not breaking alignment.
15999
16000 EVT AlignedVecTy = VecTy;
16001 Align Alignment = MemN->getAlign();
16002
16003 // If this is a less-than-standard-aligned load/store, change the type to
16004 // match the standard alignment.
16005 // The alignment is overlooked when selecting _UPD variants; and it's
16006 // easier to introduce bitcasts here than fix that.
16007 // There are 3 ways to get to this base-update combine:
16008 // - intrinsics: they are assumed to be properly aligned (to the standard
16009 // alignment of the memory type), so we don't need to do anything.
16010 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16011 // intrinsics, so, likewise, there's nothing to do.
16012 // - generic load/store instructions: the alignment is specified as an
16013 // explicit operand, rather than implicitly as the standard alignment
16014 // of the memory type (like the intrisics). We need to change the
16015 // memory type to match the explicit alignment. That way, we don't
16016 // generate non-standard-aligned ARMISD::VLDx nodes.
16017 if (isa<LSBaseSDNode>(N)) {
16018 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16019 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16020 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16021 assert(!isLaneOp && "Unexpected generic load/store lane.");
16022 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16023 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16024 }
16025 // Don't set an explicit alignment on regular load/stores that we want
16026 // to transform to VLD/VST 1_UPD nodes.
16027 // This matches the behavior of regular load/stores, which only get an
16028 // explicit alignment if the MMO alignment is larger than the standard
16029 // alignment of the memory type.
16030 // Intrinsics, however, always get an explicit alignment, set to the
16031 // alignment of the MMO.
16032 Alignment = Align(1);
16033 }
16034
16035 // Create the new updating load/store node.
16036 // First, create an SDVTList for the new updating node's results.
16037 EVT Tys[6];
16038 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16039 unsigned n;
16040 for (n = 0; n < NumResultVecs; ++n)
16041 Tys[n] = AlignedVecTy;
16042 Tys[n++] = MVT::i32;
16043 Tys[n] = MVT::Other;
16044 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16045
16046 // Then, gather the new node's operands.
16048 Ops.push_back(N->getOperand(0)); // incoming chain
16049 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16050 Ops.push_back(User.Inc);
16051
16052 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16053 // Try to match the intrinsic's signature
16054 Ops.push_back(StN->getValue());
16055 } else {
16056 // Loads (and of course intrinsics) match the intrinsics' signature,
16057 // so just add all but the alignment operand.
16058 unsigned LastOperand =
16059 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16060 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16061 Ops.push_back(N->getOperand(i));
16062 }
16063
16064 // For all node types, the alignment operand is always the last one.
16065 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16066
16067 // If this is a non-standard-aligned STORE, the penultimate operand is the
16068 // stored value. Bitcast it to the aligned type.
16069 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16070 SDValue &StVal = Ops[Ops.size() - 2];
16071 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16072 }
16073
16074 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16075 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16076 MemN->getMemOperand());
16077
16078 // Update the uses.
16079 SmallVector<SDValue, 5> NewResults;
16080 for (unsigned i = 0; i < NumResultVecs; ++i)
16081 NewResults.push_back(SDValue(UpdN.getNode(), i));
16082
16083 // If this is an non-standard-aligned LOAD, the first result is the loaded
16084 // value. Bitcast it to the expected result type.
16085 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16086 SDValue &LdVal = NewResults[0];
16087 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16088 }
16089
16090 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16091 DCI.CombineTo(N, NewResults);
16092 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16093
16094 return true;
16095}
16096
16097// If (opcode ptr inc) is and ADD-like instruction, return the
16098// increment value. Otherwise return 0.
16099static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16100 SDValue Inc, const SelectionDAG &DAG) {
16101 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16102 if (!CInc)
16103 return 0;
16104
16105 switch (Opcode) {
16106 case ARMISD::VLD1_UPD:
16107 case ISD::ADD:
16108 return CInc->getZExtValue();
16109 case ISD::OR: {
16110 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16111 // (OR ptr inc) is the same as (ADD ptr inc)
16112 return CInc->getZExtValue();
16113 }
16114 return 0;
16115 }
16116 default:
16117 return 0;
16118 }
16119}
16120
16122 switch (N->getOpcode()) {
16123 case ISD::ADD:
16124 case ISD::OR: {
16125 if (isa<ConstantSDNode>(N->getOperand(1))) {
16126 *Ptr = N->getOperand(0);
16127 *CInc = N->getOperand(1);
16128 return true;
16129 }
16130 return false;
16131 }
16132 case ARMISD::VLD1_UPD: {
16133 if (isa<ConstantSDNode>(N->getOperand(2))) {
16134 *Ptr = N->getOperand(1);
16135 *CInc = N->getOperand(2);
16136 return true;
16137 }
16138 return false;
16139 }
16140 default:
16141 return false;
16142 }
16143}
16144
16146 // Check that the add is independent of the load/store.
16147 // Otherwise, folding it would create a cycle. Search through Addr
16148 // as well, since the User may not be a direct user of Addr and
16149 // only share a base pointer.
16152 Worklist.push_back(N);
16153 Worklist.push_back(User);
16154 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16155 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16156 return false;
16157 return true;
16158}
16159
16160/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16161/// NEON load/store intrinsics, and generic vector load/stores, to merge
16162/// base address updates.
16163/// For generic load/stores, the memory type is assumed to be a vector.
16164/// The caller is assumed to have checked legality.
16167 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16168 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16169 const bool isStore = N->getOpcode() == ISD::STORE;
16170 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16171 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16172
16173 SDValue Addr = N->getOperand(AddrOpIdx);
16174
16176
16177 // Search for a use of the address operand that is an increment.
16178 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16179 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16180 SDNode *User = *UI;
16181 if (UI.getUse().getResNo() != Addr.getResNo() ||
16182 User->getNumOperands() != 2)
16183 continue;
16184
16185 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16186 unsigned ConstInc =
16187 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16188
16189 if (ConstInc || User->getOpcode() == ISD::ADD)
16190 BaseUpdates.push_back({User, Inc, ConstInc});
16191 }
16192
16193 // If the address is a constant pointer increment itself, find
16194 // another constant increment that has the same base operand
16195 SDValue Base;
16196 SDValue CInc;
16197 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16198 unsigned Offset =
16199 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16200 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16201 UI != UE; ++UI) {
16202
16203 SDNode *User = *UI;
16204 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16205 User->getNumOperands() != 2)
16206 continue;
16207
16208 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16209 unsigned UserOffset =
16210 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16211
16212 if (!UserOffset || UserOffset <= Offset)
16213 continue;
16214
16215 unsigned NewConstInc = UserOffset - Offset;
16216 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16217 BaseUpdates.push_back({User, NewInc, NewConstInc});
16218 }
16219 }
16220
16221 // Try to fold the load/store with an update that matches memory
16222 // access size. This should work well for sequential loads.
16223 //
16224 // Filter out invalid updates as well.
16225 unsigned NumValidUpd = BaseUpdates.size();
16226 for (unsigned I = 0; I < NumValidUpd;) {
16227 BaseUpdateUser &User = BaseUpdates[I];
16228 if (!isValidBaseUpdate(N, User.N)) {
16229 --NumValidUpd;
16230 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16231 continue;
16232 }
16233
16234 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16235 return SDValue();
16236 ++I;
16237 }
16238 BaseUpdates.resize(NumValidUpd);
16239
16240 // Try to fold with other users. Non-constant updates are considered
16241 // first, and constant updates are sorted to not break a sequence of
16242 // strided accesses (if there is any).
16243 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16244 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16245 return LHS.ConstInc < RHS.ConstInc;
16246 });
16247 for (BaseUpdateUser &User : BaseUpdates) {
16248 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16249 return SDValue();
16250 }
16251 return SDValue();
16252}
16253
16256 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16257 return SDValue();
16258
16259 return CombineBaseUpdate(N, DCI);
16260}
16261
16264 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16265 return SDValue();
16266
16267 SelectionDAG &DAG = DCI.DAG;
16268 SDValue Addr = N->getOperand(2);
16269 MemSDNode *MemN = cast<MemSDNode>(N);
16270 SDLoc dl(N);
16271
16272 // For the stores, where there are multiple intrinsics we only actually want
16273 // to post-inc the last of the them.
16274 unsigned IntNo = N->getConstantOperandVal(1);
16275 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16276 return SDValue();
16277 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16278 return SDValue();
16279
16280 // Search for a use of the address operand that is an increment.
16281 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16282 UE = Addr.getNode()->use_end();
16283 UI != UE; ++UI) {
16284 SDNode *User = *UI;
16285 if (User->getOpcode() != ISD::ADD ||
16286 UI.getUse().getResNo() != Addr.getResNo())
16287 continue;
16288
16289 // Check that the add is independent of the load/store. Otherwise, folding
16290 // it would create a cycle. We can avoid searching through Addr as it's a
16291 // predecessor to both.
16294 Visited.insert(Addr.getNode());
16295 Worklist.push_back(N);
16296 Worklist.push_back(User);
16297 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16298 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16299 continue;
16300
16301 // Find the new opcode for the updating load/store.
16302 bool isLoadOp = true;
16303 unsigned NewOpc = 0;
16304 unsigned NumVecs = 0;
16305 switch (IntNo) {
16306 default:
16307 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16308 case Intrinsic::arm_mve_vld2q:
16309 NewOpc = ARMISD::VLD2_UPD;
16310 NumVecs = 2;
16311 break;
16312 case Intrinsic::arm_mve_vld4q:
16313 NewOpc = ARMISD::VLD4_UPD;
16314 NumVecs = 4;
16315 break;
16316 case Intrinsic::arm_mve_vst2q:
16317 NewOpc = ARMISD::VST2_UPD;
16318 NumVecs = 2;
16319 isLoadOp = false;
16320 break;
16321 case Intrinsic::arm_mve_vst4q:
16322 NewOpc = ARMISD::VST4_UPD;
16323 NumVecs = 4;
16324 isLoadOp = false;
16325 break;
16326 }
16327
16328 // Find the size of memory referenced by the load/store.
16329 EVT VecTy;
16330 if (isLoadOp) {
16331 VecTy = N->getValueType(0);
16332 } else {
16333 VecTy = N->getOperand(3).getValueType();
16334 }
16335
16336 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16337
16338 // If the increment is a constant, it must match the memory ref size.
16339 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16340 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16341 if (!CInc || CInc->getZExtValue() != NumBytes)
16342 continue;
16343
16344 // Create the new updating load/store node.
16345 // First, create an SDVTList for the new updating node's results.
16346 EVT Tys[6];
16347 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16348 unsigned n;
16349 for (n = 0; n < NumResultVecs; ++n)
16350 Tys[n] = VecTy;
16351 Tys[n++] = MVT::i32;
16352 Tys[n] = MVT::Other;
16353 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16354
16355 // Then, gather the new node's operands.
16357 Ops.push_back(N->getOperand(0)); // incoming chain
16358 Ops.push_back(N->getOperand(2)); // ptr
16359 Ops.push_back(Inc);
16360
16361 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16362 Ops.push_back(N->getOperand(i));
16363
16364 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16365 MemN->getMemOperand());
16366
16367 // Update the uses.
16368 SmallVector<SDValue, 5> NewResults;
16369 for (unsigned i = 0; i < NumResultVecs; ++i)
16370 NewResults.push_back(SDValue(UpdN.getNode(), i));
16371
16372 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16373 DCI.CombineTo(N, NewResults);
16374 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16375
16376 break;
16377 }
16378
16379 return SDValue();
16380}
16381
16382/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16383/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16384/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16385/// return true.
16387 SelectionDAG &DAG = DCI.DAG;
16388 EVT VT = N->getValueType(0);
16389 // vldN-dup instructions only support 64-bit vectors for N > 1.
16390 if (!VT.is64BitVector())
16391 return false;
16392
16393 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16394 SDNode *VLD = N->getOperand(0).getNode();
16395 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16396 return false;
16397 unsigned NumVecs = 0;
16398 unsigned NewOpc = 0;
16399 unsigned IntNo = VLD->getConstantOperandVal(1);
16400 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16401 NumVecs = 2;
16402 NewOpc = ARMISD::VLD2DUP;
16403 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16404 NumVecs = 3;
16405 NewOpc = ARMISD::VLD3DUP;
16406 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16407 NumVecs = 4;
16408 NewOpc = ARMISD::VLD4DUP;
16409 } else {
16410 return false;
16411 }
16412
16413 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16414 // numbers match the load.
16415 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16416 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16417 UI != UE; ++UI) {
16418 // Ignore uses of the chain result.
16419 if (UI.getUse().getResNo() == NumVecs)
16420 continue;
16421 SDNode *User = *UI;
16422 if (User->getOpcode() != ARMISD::VDUPLANE ||
16423 VLDLaneNo != User->getConstantOperandVal(1))
16424 return false;
16425 }
16426
16427 // Create the vldN-dup node.
16428 EVT Tys[5];
16429 unsigned n;
16430 for (n = 0; n < NumVecs; ++n)
16431 Tys[n] = VT;
16432 Tys[n] = MVT::Other;
16433 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16434 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16435 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16436 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16437 Ops, VLDMemInt->getMemoryVT(),
16438 VLDMemInt->getMemOperand());
16439
16440 // Update the uses.
16441 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16442 UI != UE; ++UI) {
16443 unsigned ResNo = UI.getUse().getResNo();
16444 // Ignore uses of the chain result.
16445 if (ResNo == NumVecs)
16446 continue;
16447 SDNode *User = *UI;
16448 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16449 }
16450
16451 // Now the vldN-lane intrinsic is dead except for its chain result.
16452 // Update uses of the chain.
16453 std::vector<SDValue> VLDDupResults;
16454 for (unsigned n = 0; n < NumVecs; ++n)
16455 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16456 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16457 DCI.CombineTo(VLD, VLDDupResults);
16458
16459 return true;
16460}
16461
16462/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16463/// ARMISD::VDUPLANE.
16466 const ARMSubtarget *Subtarget) {
16467 SDValue Op = N->getOperand(0);
16468 EVT VT = N->getValueType(0);
16469
16470 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16471 if (Subtarget->hasMVEIntegerOps()) {
16472 EVT ExtractVT = VT.getVectorElementType();
16473 // We need to ensure we are creating a legal type.
16474 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16475 ExtractVT = MVT::i32;
16476 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16477 N->getOperand(0), N->getOperand(1));
16478 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16479 }
16480
16481 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16482 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16483 if (CombineVLDDUP(N, DCI))
16484 return SDValue(N, 0);
16485
16486 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16487 // redundant. Ignore bit_converts for now; element sizes are checked below.
16488 while (Op.getOpcode() == ISD::BITCAST)
16489 Op = Op.getOperand(0);
16490 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16491 return SDValue();
16492
16493 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16494 unsigned EltSize = Op.getScalarValueSizeInBits();
16495 // The canonical VMOV for a zero vector uses a 32-bit element size.
16496 unsigned Imm = Op.getConstantOperandVal(0);
16497 unsigned EltBits;
16498 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16499 EltSize = 8;
16500 if (EltSize > VT.getScalarSizeInBits())
16501 return SDValue();
16502
16503 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16504}
16505
16506/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16508 const ARMSubtarget *Subtarget) {
16509 SDValue Op = N->getOperand(0);
16510 SDLoc dl(N);
16511
16512 if (Subtarget->hasMVEIntegerOps()) {
16513 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16514 // need to come from a GPR.
16515 if (Op.getValueType() == MVT::f32)
16516 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16517 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16518 else if (Op.getValueType() == MVT::f16)
16519 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16520 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16521 }
16522
16523 if (!Subtarget->hasNEON())
16524 return SDValue();
16525
16526 // Match VDUP(LOAD) -> VLD1DUP.
16527 // We match this pattern here rather than waiting for isel because the
16528 // transform is only legal for unindexed loads.
16529 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16530 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16531 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16532 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16533 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16534 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16535 SDValue VLDDup =
16536 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16537 LD->getMemoryVT(), LD->getMemOperand());
16538 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16539 return VLDDup;
16540 }
16541
16542 return SDValue();
16543}
16544
16547 const ARMSubtarget *Subtarget) {
16548 EVT VT = N->getValueType(0);
16549
16550 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16551 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16553 return CombineBaseUpdate(N, DCI);
16554
16555 return SDValue();
16556}
16557
16558// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16559// pack all of the elements in one place. Next, store to memory in fewer
16560// chunks.
16562 SelectionDAG &DAG) {
16563 SDValue StVal = St->getValue();
16564 EVT VT = StVal.getValueType();
16565 if (!St->isTruncatingStore() || !VT.isVector())
16566 return SDValue();
16567 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16568 EVT StVT = St->getMemoryVT();
16569 unsigned NumElems = VT.getVectorNumElements();
16570 assert(StVT != VT && "Cannot truncate to the same type");
16571 unsigned FromEltSz = VT.getScalarSizeInBits();
16572 unsigned ToEltSz = StVT.getScalarSizeInBits();
16573
16574 // From, To sizes and ElemCount must be pow of two
16575 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16576 return SDValue();
16577
16578 // We are going to use the original vector elt for storing.
16579 // Accumulated smaller vector elements must be a multiple of the store size.
16580 if (0 != (NumElems * FromEltSz) % ToEltSz)
16581 return SDValue();
16582
16583 unsigned SizeRatio = FromEltSz / ToEltSz;
16584 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16585
16586 // Create a type on which we perform the shuffle.
16587 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16588 NumElems * SizeRatio);
16589 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16590
16591 SDLoc DL(St);
16592 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16593 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16594 for (unsigned i = 0; i < NumElems; ++i)
16595 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16596 : i * SizeRatio;
16597
16598 // Can't shuffle using an illegal type.
16599 if (!TLI.isTypeLegal(WideVecVT))
16600 return SDValue();
16601
16602 SDValue Shuff = DAG.getVectorShuffle(
16603 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16604 // At this point all of the data is stored at the bottom of the
16605 // register. We now need to save it to mem.
16606
16607 // Find the largest store unit
16608 MVT StoreType = MVT::i8;
16609 for (MVT Tp : MVT::integer_valuetypes()) {
16610 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16611 StoreType = Tp;
16612 }
16613 // Didn't find a legal store type.
16614 if (!TLI.isTypeLegal(StoreType))
16615 return SDValue();
16616
16617 // Bitcast the original vector into a vector of store-size units
16618 EVT StoreVecVT =
16619 EVT::getVectorVT(*DAG.getContext(), StoreType,
16620 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16621 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16622 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16624 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16625 TLI.getPointerTy(DAG.getDataLayout()));
16626 SDValue BasePtr = St->getBasePtr();
16627
16628 // Perform one or more big stores into memory.
16629 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16630 for (unsigned I = 0; I < E; I++) {
16631 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16632 ShuffWide, DAG.getIntPtrConstant(I, DL));
16633 SDValue Ch =
16634 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16635 St->getAlign(), St->getMemOperand()->getFlags());
16636 BasePtr =
16637 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16638 Chains.push_back(Ch);
16639 }
16640 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16641}
16642
16643// Try taking a single vector store from an fpround (which would otherwise turn
16644// into an expensive buildvector) and splitting it into a series of narrowing
16645// stores.
16647 SelectionDAG &DAG) {
16648 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16649 return SDValue();
16650 SDValue Trunc = St->getValue();
16651 if (Trunc->getOpcode() != ISD::FP_ROUND)
16652 return SDValue();
16653 EVT FromVT = Trunc->getOperand(0).getValueType();
16654 EVT ToVT = Trunc.getValueType();
16655 if (!ToVT.isVector())
16656 return SDValue();
16658 EVT ToEltVT = ToVT.getVectorElementType();
16659 EVT FromEltVT = FromVT.getVectorElementType();
16660
16661 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16662 return SDValue();
16663
16664 unsigned NumElements = 4;
16665 if (FromVT.getVectorNumElements() % NumElements != 0)
16666 return SDValue();
16667
16668 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16669 // use the VMOVN over splitting the store. We are looking for patterns of:
16670 // !rev: 0 N 1 N+1 2 N+2 ...
16671 // rev: N 0 N+1 1 N+2 2 ...
16672 // The shuffle may either be a single source (in which case N = NumElts/2) or
16673 // two inputs extended with concat to the same size (in which case N =
16674 // NumElts).
16675 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16676 ArrayRef<int> M = SVN->getMask();
16677 unsigned NumElts = ToVT.getVectorNumElements();
16678 if (SVN->getOperand(1).isUndef())
16679 NumElts /= 2;
16680
16681 unsigned Off0 = Rev ? NumElts : 0;
16682 unsigned Off1 = Rev ? 0 : NumElts;
16683
16684 for (unsigned I = 0; I < NumElts; I += 2) {
16685 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16686 return false;
16687 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16688 return false;
16689 }
16690
16691 return true;
16692 };
16693
16694 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16695 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16696 return SDValue();
16697
16698 LLVMContext &C = *DAG.getContext();
16699 SDLoc DL(St);
16700 // Details about the old store
16701 SDValue Ch = St->getChain();
16702 SDValue BasePtr = St->getBasePtr();
16703 Align Alignment = St->getOriginalAlign();
16705 AAMDNodes AAInfo = St->getAAInfo();
16706
16707 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16708 // and then stored as truncating integer stores.
16709 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16710 EVT NewToVT = EVT::getVectorVT(
16711 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16712
16714 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16715 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16716 SDValue NewPtr =
16717 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16718
16719 SDValue Extract =
16720 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16721 DAG.getConstant(i * NumElements, DL, MVT::i32));
16722
16723 SDValue FPTrunc =
16724 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16725 Extract, DAG.getConstant(0, DL, MVT::i32));
16726 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16727
16728 SDValue Store = DAG.getTruncStore(
16729 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16730 NewToVT, Alignment, MMOFlags, AAInfo);
16731 Stores.push_back(Store);
16732 }
16733 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16734}
16735
16736// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16737// into an expensive buildvector) and splitting it into a series of narrowing
16738// stores.
16740 SelectionDAG &DAG) {
16741 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16742 return SDValue();
16743 SDValue Trunc = St->getValue();
16744 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16745 return SDValue();
16746 EVT FromVT = Trunc->getOperand(0).getValueType();
16747 EVT ToVT = Trunc.getValueType();
16748
16749 LLVMContext &C = *DAG.getContext();
16750 SDLoc DL(St);
16751 // Details about the old store
16752 SDValue Ch = St->getChain();
16753 SDValue BasePtr = St->getBasePtr();
16754 Align Alignment = St->getOriginalAlign();
16756 AAMDNodes AAInfo = St->getAAInfo();
16757
16758 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16759 FromVT.getVectorNumElements());
16760
16762 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16763 unsigned NewOffset =
16764 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16765 SDValue NewPtr =
16766 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16767
16768 SDValue Extract = Trunc.getOperand(i);
16769 SDValue Store = DAG.getTruncStore(
16770 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16771 NewToVT, Alignment, MMOFlags, AAInfo);
16772 Stores.push_back(Store);
16773 }
16774 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16775}
16776
16777// Given a floating point store from an extracted vector, with an integer
16778// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16779// help reduce fp register pressure, doesn't require the fp extract and allows
16780// use of more integer post-inc stores not available with vstr.
16782 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16783 return SDValue();
16784 SDValue Extract = St->getValue();
16785 EVT VT = Extract.getValueType();
16786 // For now only uses f16. This may be useful for f32 too, but that will
16787 // be bitcast(extract), not the VGETLANEu we currently check here.
16788 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16789 return SDValue();
16790
16791 SDNode *GetLane =
16792 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16793 {Extract.getOperand(0), Extract.getOperand(1)});
16794 if (!GetLane)
16795 return SDValue();
16796
16797 LLVMContext &C = *DAG.getContext();
16798 SDLoc DL(St);
16799 // Create a new integer store to replace the existing floating point version.
16800 SDValue Ch = St->getChain();
16801 SDValue BasePtr = St->getBasePtr();
16802 Align Alignment = St->getOriginalAlign();
16804 AAMDNodes AAInfo = St->getAAInfo();
16805 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16806 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16807 St->getPointerInfo(), NewToVT, Alignment,
16808 MMOFlags, AAInfo);
16809
16810 return Store;
16811}
16812
16813/// PerformSTORECombine - Target-specific dag combine xforms for
16814/// ISD::STORE.
16817 const ARMSubtarget *Subtarget) {
16818 StoreSDNode *St = cast<StoreSDNode>(N);
16819 if (St->isVolatile())
16820 return SDValue();
16821 SDValue StVal = St->getValue();
16822 EVT VT = StVal.getValueType();
16823
16824 if (Subtarget->hasNEON())
16825 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16826 return Store;
16827
16828 if (Subtarget->hasMVEFloatOps())
16829 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16830 return NewToken;
16831
16832 if (Subtarget->hasMVEIntegerOps()) {
16833 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16834 return NewChain;
16835 if (SDValue NewToken =
16837 return NewToken;
16838 }
16839
16840 if (!ISD::isNormalStore(St))
16841 return SDValue();
16842
16843 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16844 // ARM stores of arguments in the same cache line.
16845 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16846 StVal.getNode()->hasOneUse()) {
16847 SelectionDAG &DAG = DCI.DAG;
16848 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16849 SDLoc DL(St);
16850 SDValue BasePtr = St->getBasePtr();
16851 SDValue NewST1 = DAG.getStore(
16852 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16853 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16854 St->getMemOperand()->getFlags());
16855
16856 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16857 DAG.getConstant(4, DL, MVT::i32));
16858 return DAG.getStore(NewST1.getValue(0), DL,
16859 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16860 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16861 St->getOriginalAlign(),
16862 St->getMemOperand()->getFlags());
16863 }
16864
16865 if (StVal.getValueType() == MVT::i64 &&
16867
16868 // Bitcast an i64 store extracted from a vector to f64.
16869 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16870 SelectionDAG &DAG = DCI.DAG;
16871 SDLoc dl(StVal);
16872 SDValue IntVec = StVal.getOperand(0);
16873 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16875 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16876 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16877 Vec, StVal.getOperand(1));
16878 dl = SDLoc(N);
16879 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16880 // Make the DAGCombiner fold the bitcasts.
16881 DCI.AddToWorklist(Vec.getNode());
16882 DCI.AddToWorklist(ExtElt.getNode());
16883 DCI.AddToWorklist(V.getNode());
16884 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16885 St->getPointerInfo(), St->getAlign(),
16886 St->getMemOperand()->getFlags(), St->getAAInfo());
16887 }
16888
16889 // If this is a legal vector store, try to combine it into a VST1_UPD.
16890 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16892 return CombineBaseUpdate(N, DCI);
16893
16894 return SDValue();
16895}
16896
16897/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16898/// can replace combinations of VMUL and VCVT (floating-point to integer)
16899/// when the VMUL has a constant operand that is a power of 2.
16900///
16901/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16902/// vmul.f32 d16, d17, d16
16903/// vcvt.s32.f32 d16, d16
16904/// becomes:
16905/// vcvt.s32.f32 d16, d16, #3
16907 const ARMSubtarget *Subtarget) {
16908 if (!Subtarget->hasNEON())
16909 return SDValue();
16910
16911 SDValue Op = N->getOperand(0);
16912 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16913 Op.getOpcode() != ISD::FMUL)
16914 return SDValue();
16915
16916 SDValue ConstVec = Op->getOperand(1);
16917 if (!isa<BuildVectorSDNode>(ConstVec))
16918 return SDValue();
16919
16920 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16921 uint32_t FloatBits = FloatTy.getSizeInBits();
16922 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16923 uint32_t IntBits = IntTy.getSizeInBits();
16924 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16925 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16926 // These instructions only exist converting from f32 to i32. We can handle
16927 // smaller integers by generating an extra truncate, but larger ones would
16928 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16929 // these intructions only support v2i32/v4i32 types.
16930 return SDValue();
16931 }
16932
16933 BitVector UndefElements;
16934 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16935 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16936 if (C == -1 || C == 0 || C > 32)
16937 return SDValue();
16938
16939 SDLoc dl(N);
16940 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16941 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16942 Intrinsic::arm_neon_vcvtfp2fxu;
16943 SDValue FixConv = DAG.getNode(
16944 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16945 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16946 DAG.getConstant(C, dl, MVT::i32));
16947
16948 if (IntBits < FloatBits)
16949 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16950
16951 return FixConv;
16952}
16953
16955 const ARMSubtarget *Subtarget) {
16956 if (!Subtarget->hasMVEFloatOps())
16957 return SDValue();
16958
16959 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16960 // The second form can be more easily turned into a predicated vadd, and
16961 // possibly combined into a fma to become a predicated vfma.
16962 SDValue Op0 = N->getOperand(0);
16963 SDValue Op1 = N->getOperand(1);
16964 EVT VT = N->getValueType(0);
16965 SDLoc DL(N);
16966
16967 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16968 // which these VMOV's represent.
16969 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16970 if (Op.getOpcode() != ISD::BITCAST ||
16971 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16972 return false;
16973 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16974 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16975 return true;
16976 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16977 return true;
16978 return false;
16979 };
16980
16981 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16982 std::swap(Op0, Op1);
16983
16984 if (Op1.getOpcode() != ISD::VSELECT)
16985 return SDValue();
16986
16987 SDNodeFlags FaddFlags = N->getFlags();
16988 bool NSZ = FaddFlags.hasNoSignedZeros();
16989 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16990 return SDValue();
16991
16992 SDValue FAdd =
16993 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16994 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16995}
16996
16998 SDValue LHS = N->getOperand(0);
16999 SDValue RHS = N->getOperand(1);
17000 EVT VT = N->getValueType(0);
17001 SDLoc DL(N);
17002
17003 if (!N->getFlags().hasAllowReassociation())
17004 return SDValue();
17005
17006 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17007 auto ReassocComplex = [&](SDValue A, SDValue B) {
17008 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17009 return SDValue();
17010 unsigned Opc = A.getConstantOperandVal(0);
17011 if (Opc != Intrinsic::arm_mve_vcmlaq)
17012 return SDValue();
17013 SDValue VCMLA = DAG.getNode(
17014 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17015 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17016 A.getOperand(3), A.getOperand(4));
17017 VCMLA->setFlags(A->getFlags());
17018 return VCMLA;
17019 };
17020 if (SDValue R = ReassocComplex(LHS, RHS))
17021 return R;
17022 if (SDValue R = ReassocComplex(RHS, LHS))
17023 return R;
17024
17025 return SDValue();
17026}
17027
17029 const ARMSubtarget *Subtarget) {
17030 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17031 return S;
17032 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17033 return S;
17034 return SDValue();
17035}
17036
17037/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17038/// can replace combinations of VCVT (integer to floating-point) and VMUL
17039/// when the VMUL has a constant operand that is a power of 2.
17040///
17041/// Example (assume d17 = <float 0.125, float 0.125>):
17042/// vcvt.f32.s32 d16, d16
17043/// vmul.f32 d16, d16, d17
17044/// becomes:
17045/// vcvt.f32.s32 d16, d16, #3
17047 const ARMSubtarget *Subtarget) {
17048 if (!Subtarget->hasNEON())
17049 return SDValue();
17050
17051 SDValue Op = N->getOperand(0);
17052 unsigned OpOpcode = Op.getNode()->getOpcode();
17053 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17054 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17055 return SDValue();
17056
17057 SDValue ConstVec = N->getOperand(1);
17058 if (!isa<BuildVectorSDNode>(ConstVec))
17059 return SDValue();
17060
17061 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17062 uint32_t FloatBits = FloatTy.getSizeInBits();
17063 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17064 uint32_t IntBits = IntTy.getSizeInBits();
17065 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17066 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17067 // These instructions only exist converting from i32 to f32. We can handle
17068 // smaller integers by generating an extra extend, but larger ones would
17069 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17070 // these intructions only support v2i32/v4i32 types.
17071 return SDValue();
17072 }
17073
17074 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17075 APFloat Recip(0.0f);
17076 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17077 return SDValue();
17078
17079 bool IsExact;
17080 APSInt IntVal(33);
17081 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17082 APFloat::opOK ||
17083 !IsExact)
17084 return SDValue();
17085
17086 int32_t C = IntVal.exactLogBase2();
17087 if (C == -1 || C == 0 || C > 32)
17088 return SDValue();
17089
17090 SDLoc DL(N);
17091 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17092 SDValue ConvInput = Op.getOperand(0);
17093 if (IntBits < FloatBits)
17095 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17096
17097 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17098 : Intrinsic::arm_neon_vcvtfxu2fp;
17099 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17100 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17101 DAG.getConstant(C, DL, MVT::i32));
17102}
17103
17105 const ARMSubtarget *ST) {
17106 if (!ST->hasMVEIntegerOps())
17107 return SDValue();
17108
17109 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17110 EVT ResVT = N->getValueType(0);
17111 SDValue N0 = N->getOperand(0);
17112 SDLoc dl(N);
17113
17114 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17115 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17116 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17117 N0.getValueType() == MVT::v16i8)) {
17118 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17119 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17120 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17121 }
17122
17123 // We are looking for something that will have illegal types if left alone,
17124 // but that we can convert to a single instruction under MVE. For example
17125 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17126 // or
17127 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17128
17129 // The legal cases are:
17130 // VADDV u/s 8/16/32
17131 // VMLAV u/s 8/16/32
17132 // VADDLV u/s 32
17133 // VMLALV u/s 16/32
17134
17135 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17136 // extend it and use v4i32 instead.
17137 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17138 EVT AVT = A.getValueType();
17139 return any_of(ExtTypes, [&](MVT Ty) {
17140 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17141 AVT.bitsLE(Ty);
17142 });
17143 };
17144 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17145 EVT AVT = A.getValueType();
17146 if (!AVT.is128BitVector())
17147 A = DAG.getNode(ExtendCode, dl,
17149 128 / AVT.getVectorMinNumElements())),
17150 A);
17151 return A;
17152 };
17153 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17154 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17155 return SDValue();
17156 SDValue A = N0->getOperand(0);
17157 if (ExtTypeMatches(A, ExtTypes))
17158 return ExtendIfNeeded(A, ExtendCode);
17159 return SDValue();
17160 };
17161 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17162 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17163 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17165 return SDValue();
17166 Mask = N0->getOperand(0);
17167 SDValue Ext = N0->getOperand(1);
17168 if (Ext->getOpcode() != ExtendCode)
17169 return SDValue();
17170 SDValue A = Ext->getOperand(0);
17171 if (ExtTypeMatches(A, ExtTypes))
17172 return ExtendIfNeeded(A, ExtendCode);
17173 return SDValue();
17174 };
17175 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17176 SDValue &A, SDValue &B) {
17177 // For a vmla we are trying to match a larger pattern:
17178 // ExtA = sext/zext A
17179 // ExtB = sext/zext B
17180 // Mul = mul ExtA, ExtB
17181 // vecreduce.add Mul
17182 // There might also be en extra extend between the mul and the addreduce, so
17183 // long as the bitwidth is high enough to make them equivalent (for example
17184 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17185 if (ResVT != RetTy)
17186 return false;
17187 SDValue Mul = N0;
17188 if (Mul->getOpcode() == ExtendCode &&
17189 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17190 ResVT.getScalarSizeInBits())
17191 Mul = Mul->getOperand(0);
17192 if (Mul->getOpcode() != ISD::MUL)
17193 return false;
17194 SDValue ExtA = Mul->getOperand(0);
17195 SDValue ExtB = Mul->getOperand(1);
17196 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17197 return false;
17198 A = ExtA->getOperand(0);
17199 B = ExtB->getOperand(0);
17200 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17201 A = ExtendIfNeeded(A, ExtendCode);
17202 B = ExtendIfNeeded(B, ExtendCode);
17203 return true;
17204 }
17205 return false;
17206 };
17207 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17208 SDValue &A, SDValue &B, SDValue &Mask) {
17209 // Same as the pattern above with a select for the zero predicated lanes
17210 // ExtA = sext/zext A
17211 // ExtB = sext/zext B
17212 // Mul = mul ExtA, ExtB
17213 // N0 = select Mask, Mul, 0
17214 // vecreduce.add N0
17215 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17217 return false;
17218 Mask = N0->getOperand(0);
17219 SDValue Mul = N0->getOperand(1);
17220 if (Mul->getOpcode() == ExtendCode &&
17221 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17222 ResVT.getScalarSizeInBits())
17223 Mul = Mul->getOperand(0);
17224 if (Mul->getOpcode() != ISD::MUL)
17225 return false;
17226 SDValue ExtA = Mul->getOperand(0);
17227 SDValue ExtB = Mul->getOperand(1);
17228 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17229 return false;
17230 A = ExtA->getOperand(0);
17231 B = ExtB->getOperand(0);
17232 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17233 A = ExtendIfNeeded(A, ExtendCode);
17234 B = ExtendIfNeeded(B, ExtendCode);
17235 return true;
17236 }
17237 return false;
17238 };
17239 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17240 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17241 // reductions. The operands are extended with MVEEXT, but as they are
17242 // reductions the lane orders do not matter. MVEEXT may be combined with
17243 // loads to produce two extending loads, or else they will be expanded to
17244 // VREV/VMOVL.
17245 EVT VT = Ops[0].getValueType();
17246 if (VT == MVT::v16i8) {
17247 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17248 "Unexpected illegal long reduction opcode");
17249 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17250
17251 SDValue Ext0 =
17252 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17253 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17254 SDValue Ext1 =
17255 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17256 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17257
17258 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17259 Ext0, Ext1);
17260 SDValue MLA1 =
17261 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17262 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17263 Ext0.getValue(1), Ext1.getValue(1));
17264 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17265 }
17266 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17267 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17268 SDValue(Node.getNode(), 1));
17269 };
17270
17271 SDValue A, B;
17272 SDValue Mask;
17273 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17274 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17275 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17276 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17277 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17278 A, B))
17279 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17280 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17281 A, B))
17282 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17283 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17284 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17285 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17286 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17287 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17288 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17289
17290 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17291 Mask))
17292 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17293 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17294 Mask))
17295 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17296 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17297 Mask))
17298 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17299 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17300 Mask))
17301 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17302 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17303 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17304 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17305 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17306 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17307 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17308
17309 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17310 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17311 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17312 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17313 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17314 return Create64bitNode(ARMISD::VADDLVs, {A});
17315 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17316 return Create64bitNode(ARMISD::VADDLVu, {A});
17317 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17318 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17319 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17320 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17321 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17322 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17323
17324 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17325 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17326 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17327 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17328 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17329 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17330 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17331 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17332 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17333 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17334 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17335 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17336 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17337 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17338
17339 // Some complications. We can get a case where the two inputs of the mul are
17340 // the same, then the output sext will have been helpfully converted to a
17341 // zext. Turn it back.
17342 SDValue Op = N0;
17343 if (Op->getOpcode() == ISD::VSELECT)
17344 Op = Op->getOperand(1);
17345 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17346 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17347 SDValue Mul = Op->getOperand(0);
17348 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17349 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17350 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17351 if (Op != N0)
17352 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17353 N0->getOperand(0), Ext, N0->getOperand(2));
17354 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17355 }
17356 }
17357
17358 return SDValue();
17359}
17360
17361// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17362// the lanes are used. Due to the reduction being commutative the shuffle can be
17363// removed.
17365 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17366 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17367 if (!Shuf || !Shuf->getOperand(1).isUndef())
17368 return SDValue();
17369
17370 // Check all elements are used once in the mask.
17371 ArrayRef<int> Mask = Shuf->getMask();
17372 APInt SetElts(Mask.size(), 0);
17373 for (int E : Mask) {
17374 if (E < 0 || E >= (int)Mask.size())
17375 return SDValue();
17376 SetElts.setBit(E);
17377 }
17378 if (!SetElts.isAllOnes())
17379 return SDValue();
17380
17381 if (N->getNumOperands() != VecOp + 1) {
17382 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17383 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17384 return SDValue();
17385 }
17386
17388 for (SDValue Op : N->ops()) {
17389 if (Op.getValueType().isVector())
17390 Ops.push_back(Op.getOperand(0));
17391 else
17392 Ops.push_back(Op);
17393 }
17394 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17395}
17396
17399 SDValue Op0 = N->getOperand(0);
17400 SDValue Op1 = N->getOperand(1);
17401 unsigned IsTop = N->getConstantOperandVal(2);
17402
17403 // VMOVNT a undef -> a
17404 // VMOVNB a undef -> a
17405 // VMOVNB undef a -> a
17406 if (Op1->isUndef())
17407 return Op0;
17408 if (Op0->isUndef() && !IsTop)
17409 return Op1;
17410
17411 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17412 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17413 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17414 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17415 Op1->getConstantOperandVal(2) == 0)
17416 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17417 Op0, Op1->getOperand(1), N->getOperand(2));
17418
17419 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17420 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17421 // into the top or bottom lanes.
17422 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17423 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17424 APInt Op0DemandedElts =
17425 IsTop ? Op1DemandedElts
17426 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17427
17428 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17429 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17430 return SDValue(N, 0);
17431 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17432 return SDValue(N, 0);
17433
17434 return SDValue();
17435}
17436
17439 SDValue Op0 = N->getOperand(0);
17440 unsigned IsTop = N->getConstantOperandVal(2);
17441
17442 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17443 APInt Op0DemandedElts =
17444 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17445 : APInt::getHighBitsSet(2, 1));
17446
17447 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17448 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17449 return SDValue(N, 0);
17450 return SDValue();
17451}
17452
17455 EVT VT = N->getValueType(0);
17456 SDValue LHS = N->getOperand(0);
17457 SDValue RHS = N->getOperand(1);
17458
17459 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17460 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17461 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17462 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17463 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17464 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17465 SDLoc DL(N);
17466 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17467 LHS.getOperand(0), RHS.getOperand(0));
17468 SDValue UndefV = LHS.getOperand(1);
17469 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17470 }
17471 return SDValue();
17472}
17473
17475 SDLoc DL(N);
17476 SDValue Op0 = N->getOperand(0);
17477 SDValue Op1 = N->getOperand(1);
17478
17479 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17480 // uses of the intrinsics.
17481 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17482 int ShiftAmt = C->getSExtValue();
17483 if (ShiftAmt == 0) {
17484 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17485 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17486 return SDValue();
17487 }
17488
17489 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17490 unsigned NewOpcode =
17491 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17492 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17493 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17494 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17495 return NewShift;
17496 }
17497 }
17498
17499 return SDValue();
17500}
17501
17502/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17504 DAGCombinerInfo &DCI) const {
17505 SelectionDAG &DAG = DCI.DAG;
17506 unsigned IntNo = N->getConstantOperandVal(0);
17507 switch (IntNo) {
17508 default:
17509 // Don't do anything for most intrinsics.
17510 break;
17511
17512 // Vector shifts: check for immediate versions and lower them.
17513 // Note: This is done during DAG combining instead of DAG legalizing because
17514 // the build_vectors for 64-bit vector element shift counts are generally
17515 // not legal, and it is hard to see their values after they get legalized to
17516 // loads from a constant pool.
17517 case Intrinsic::arm_neon_vshifts:
17518 case Intrinsic::arm_neon_vshiftu:
17519 case Intrinsic::arm_neon_vrshifts:
17520 case Intrinsic::arm_neon_vrshiftu:
17521 case Intrinsic::arm_neon_vrshiftn:
17522 case Intrinsic::arm_neon_vqshifts:
17523 case Intrinsic::arm_neon_vqshiftu:
17524 case Intrinsic::arm_neon_vqshiftsu:
17525 case Intrinsic::arm_neon_vqshiftns:
17526 case Intrinsic::arm_neon_vqshiftnu:
17527 case Intrinsic::arm_neon_vqshiftnsu:
17528 case Intrinsic::arm_neon_vqrshiftns:
17529 case Intrinsic::arm_neon_vqrshiftnu:
17530 case Intrinsic::arm_neon_vqrshiftnsu: {
17531 EVT VT = N->getOperand(1).getValueType();
17532 int64_t Cnt;
17533 unsigned VShiftOpc = 0;
17534
17535 switch (IntNo) {
17536 case Intrinsic::arm_neon_vshifts:
17537 case Intrinsic::arm_neon_vshiftu:
17538 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17539 VShiftOpc = ARMISD::VSHLIMM;
17540 break;
17541 }
17542 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17543 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17545 break;
17546 }
17547 return SDValue();
17548
17549 case Intrinsic::arm_neon_vrshifts:
17550 case Intrinsic::arm_neon_vrshiftu:
17551 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17552 break;
17553 return SDValue();
17554
17555 case Intrinsic::arm_neon_vqshifts:
17556 case Intrinsic::arm_neon_vqshiftu:
17557 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17558 break;
17559 return SDValue();
17560
17561 case Intrinsic::arm_neon_vqshiftsu:
17562 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17563 break;
17564 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17565
17566 case Intrinsic::arm_neon_vrshiftn:
17567 case Intrinsic::arm_neon_vqshiftns:
17568 case Intrinsic::arm_neon_vqshiftnu:
17569 case Intrinsic::arm_neon_vqshiftnsu:
17570 case Intrinsic::arm_neon_vqrshiftns:
17571 case Intrinsic::arm_neon_vqrshiftnu:
17572 case Intrinsic::arm_neon_vqrshiftnsu:
17573 // Narrowing shifts require an immediate right shift.
17574 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17575 break;
17576 llvm_unreachable("invalid shift count for narrowing vector shift "
17577 "intrinsic");
17578
17579 default:
17580 llvm_unreachable("unhandled vector shift");
17581 }
17582
17583 switch (IntNo) {
17584 case Intrinsic::arm_neon_vshifts:
17585 case Intrinsic::arm_neon_vshiftu:
17586 // Opcode already set above.
17587 break;
17588 case Intrinsic::arm_neon_vrshifts:
17589 VShiftOpc = ARMISD::VRSHRsIMM;
17590 break;
17591 case Intrinsic::arm_neon_vrshiftu:
17592 VShiftOpc = ARMISD::VRSHRuIMM;
17593 break;
17594 case Intrinsic::arm_neon_vrshiftn:
17595 VShiftOpc = ARMISD::VRSHRNIMM;
17596 break;
17597 case Intrinsic::arm_neon_vqshifts:
17598 VShiftOpc = ARMISD::VQSHLsIMM;
17599 break;
17600 case Intrinsic::arm_neon_vqshiftu:
17601 VShiftOpc = ARMISD::VQSHLuIMM;
17602 break;
17603 case Intrinsic::arm_neon_vqshiftsu:
17604 VShiftOpc = ARMISD::VQSHLsuIMM;
17605 break;
17606 case Intrinsic::arm_neon_vqshiftns:
17607 VShiftOpc = ARMISD::VQSHRNsIMM;
17608 break;
17609 case Intrinsic::arm_neon_vqshiftnu:
17610 VShiftOpc = ARMISD::VQSHRNuIMM;
17611 break;
17612 case Intrinsic::arm_neon_vqshiftnsu:
17613 VShiftOpc = ARMISD::VQSHRNsuIMM;
17614 break;
17615 case Intrinsic::arm_neon_vqrshiftns:
17616 VShiftOpc = ARMISD::VQRSHRNsIMM;
17617 break;
17618 case Intrinsic::arm_neon_vqrshiftnu:
17619 VShiftOpc = ARMISD::VQRSHRNuIMM;
17620 break;
17621 case Intrinsic::arm_neon_vqrshiftnsu:
17622 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17623 break;
17624 }
17625
17626 SDLoc dl(N);
17627 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17628 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17629 }
17630
17631 case Intrinsic::arm_neon_vshiftins: {
17632 EVT VT = N->getOperand(1).getValueType();
17633 int64_t Cnt;
17634 unsigned VShiftOpc = 0;
17635
17636 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17637 VShiftOpc = ARMISD::VSLIIMM;
17638 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17639 VShiftOpc = ARMISD::VSRIIMM;
17640 else {
17641 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17642 }
17643
17644 SDLoc dl(N);
17645 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17646 N->getOperand(1), N->getOperand(2),
17647 DAG.getConstant(Cnt, dl, MVT::i32));
17648 }
17649
17650 case Intrinsic::arm_neon_vqrshifts:
17651 case Intrinsic::arm_neon_vqrshiftu:
17652 // No immediate versions of these to check for.
17653 break;
17654
17655 case Intrinsic::arm_mve_vqdmlah:
17656 case Intrinsic::arm_mve_vqdmlash:
17657 case Intrinsic::arm_mve_vqrdmlah:
17658 case Intrinsic::arm_mve_vqrdmlash:
17659 case Intrinsic::arm_mve_vmla_n_predicated:
17660 case Intrinsic::arm_mve_vmlas_n_predicated:
17661 case Intrinsic::arm_mve_vqdmlah_predicated:
17662 case Intrinsic::arm_mve_vqdmlash_predicated:
17663 case Intrinsic::arm_mve_vqrdmlah_predicated:
17664 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17665 // These intrinsics all take an i32 scalar operand which is narrowed to the
17666 // size of a single lane of the vector type they return. So we don't need
17667 // any bits of that operand above that point, which allows us to eliminate
17668 // uxth/sxth.
17669 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17670 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17671 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17672 return SDValue();
17673 break;
17674 }
17675
17676 case Intrinsic::arm_mve_minv:
17677 case Intrinsic::arm_mve_maxv:
17678 case Intrinsic::arm_mve_minav:
17679 case Intrinsic::arm_mve_maxav:
17680 case Intrinsic::arm_mve_minv_predicated:
17681 case Intrinsic::arm_mve_maxv_predicated:
17682 case Intrinsic::arm_mve_minav_predicated:
17683 case Intrinsic::arm_mve_maxav_predicated: {
17684 // These intrinsics all take an i32 scalar operand which is narrowed to the
17685 // size of a single lane of the vector type they take as the other input.
17686 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17687 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17688 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17689 return SDValue();
17690 break;
17691 }
17692
17693 case Intrinsic::arm_mve_addv: {
17694 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17695 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17696 bool Unsigned = N->getConstantOperandVal(2);
17697 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17698 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17699 }
17700
17701 case Intrinsic::arm_mve_addlv:
17702 case Intrinsic::arm_mve_addlv_predicated: {
17703 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17704 // which recombines the two outputs into an i64
17705 bool Unsigned = N->getConstantOperandVal(2);
17706 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17709
17711 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17712 if (i != 2) // skip the unsigned flag
17713 Ops.push_back(N->getOperand(i));
17714
17715 SDLoc dl(N);
17716 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17717 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17718 val.getValue(1));
17719 }
17720 }
17721
17722 return SDValue();
17723}
17724
17725/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17726/// lowers them. As with the vector shift intrinsics, this is done during DAG
17727/// combining instead of DAG legalizing because the build_vectors for 64-bit
17728/// vector element shift counts are generally not legal, and it is hard to see
17729/// their values after they get legalized to loads from a constant pool.
17732 const ARMSubtarget *ST) {
17733 SelectionDAG &DAG = DCI.DAG;
17734 EVT VT = N->getValueType(0);
17735
17736 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17737 N->getOperand(0)->getOpcode() == ISD::AND &&
17738 N->getOperand(0)->hasOneUse()) {
17739 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17740 return SDValue();
17741 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17742 // usually show up because instcombine prefers to canonicalize it to
17743 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17744 // out of GEP lowering in some cases.
17745 SDValue N0 = N->getOperand(0);
17746 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17747 if (!ShiftAmtNode)
17748 return SDValue();
17749 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17750 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17751 if (!AndMaskNode)
17752 return SDValue();
17753 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17754 // Don't transform uxtb/uxth.
17755 if (AndMask == 255 || AndMask == 65535)
17756 return SDValue();
17757 if (isMask_32(AndMask)) {
17758 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17759 if (MaskedBits > ShiftAmt) {
17760 SDLoc DL(N);
17761 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17762 DAG.getConstant(MaskedBits, DL, MVT::i32));
17763 return DAG.getNode(
17764 ISD::SRL, DL, MVT::i32, SHL,
17765 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17766 }
17767 }
17768 }
17769
17770 // Nothing to be done for scalar shifts.
17771 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17772 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17773 return SDValue();
17774 if (ST->hasMVEIntegerOps())
17775 return SDValue();
17776
17777 int64_t Cnt;
17778
17779 switch (N->getOpcode()) {
17780 default: llvm_unreachable("unexpected shift opcode");
17781
17782 case ISD::SHL:
17783 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17784 SDLoc dl(N);
17785 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17786 DAG.getConstant(Cnt, dl, MVT::i32));
17787 }
17788 break;
17789
17790 case ISD::SRA:
17791 case ISD::SRL:
17792 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17793 unsigned VShiftOpc =
17794 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17795 SDLoc dl(N);
17796 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17797 DAG.getConstant(Cnt, dl, MVT::i32));
17798 }
17799 }
17800 return SDValue();
17801}
17802
17803// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17804// split into multiple extending loads, which are simpler to deal with than an
17805// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17806// to convert the type to an f32.
17808 SDValue N0 = N->getOperand(0);
17809 if (N0.getOpcode() != ISD::LOAD)
17810 return SDValue();
17811 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17812 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17813 LD->getExtensionType() != ISD::NON_EXTLOAD)
17814 return SDValue();
17815 EVT FromVT = LD->getValueType(0);
17816 EVT ToVT = N->getValueType(0);
17817 if (!ToVT.isVector())
17818 return SDValue();
17820 EVT ToEltVT = ToVT.getVectorElementType();
17821 EVT FromEltVT = FromVT.getVectorElementType();
17822
17823 unsigned NumElements = 0;
17824 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17825 NumElements = 4;
17826 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17827 NumElements = 4;
17828 if (NumElements == 0 ||
17829 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17830 FromVT.getVectorNumElements() % NumElements != 0 ||
17831 !isPowerOf2_32(NumElements))
17832 return SDValue();
17833
17834 LLVMContext &C = *DAG.getContext();
17835 SDLoc DL(LD);
17836 // Details about the old load
17837 SDValue Ch = LD->getChain();
17838 SDValue BasePtr = LD->getBasePtr();
17839 Align Alignment = LD->getOriginalAlign();
17840 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17841 AAMDNodes AAInfo = LD->getAAInfo();
17842
17843 ISD::LoadExtType NewExtType =
17844 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17845 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17846 EVT NewFromVT = EVT::getVectorVT(
17847 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17848 EVT NewToVT = EVT::getVectorVT(
17849 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17850
17853 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17854 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17855 SDValue NewPtr =
17856 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17857
17858 SDValue NewLoad =
17859 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17860 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17861 Alignment, MMOFlags, AAInfo);
17862 Loads.push_back(NewLoad);
17863 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17864 }
17865
17866 // Float truncs need to extended with VCVTB's into their floating point types.
17867 if (FromEltVT == MVT::f16) {
17869
17870 for (unsigned i = 0; i < Loads.size(); i++) {
17871 SDValue LoadBC =
17872 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17873 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17874 DAG.getConstant(0, DL, MVT::i32));
17875 Extends.push_back(FPExt);
17876 }
17877
17878 Loads = Extends;
17879 }
17880
17881 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17882 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17883 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17884}
17885
17886/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17887/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17889 const ARMSubtarget *ST) {
17890 SDValue N0 = N->getOperand(0);
17891
17892 // Check for sign- and zero-extensions of vector extract operations of 8- and
17893 // 16-bit vector elements. NEON and MVE support these directly. They are
17894 // handled during DAG combining because type legalization will promote them
17895 // to 32-bit types and it is messy to recognize the operations after that.
17896 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17898 SDValue Vec = N0.getOperand(0);
17899 SDValue Lane = N0.getOperand(1);
17900 EVT VT = N->getValueType(0);
17901 EVT EltVT = N0.getValueType();
17902 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17903
17904 if (VT == MVT::i32 &&
17905 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17906 TLI.isTypeLegal(Vec.getValueType()) &&
17907 isa<ConstantSDNode>(Lane)) {
17908
17909 unsigned Opc = 0;
17910 switch (N->getOpcode()) {
17911 default: llvm_unreachable("unexpected opcode");
17912 case ISD::SIGN_EXTEND:
17913 Opc = ARMISD::VGETLANEs;
17914 break;
17915 case ISD::ZERO_EXTEND:
17916 case ISD::ANY_EXTEND:
17917 Opc = ARMISD::VGETLANEu;
17918 break;
17919 }
17920 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17921 }
17922 }
17923
17924 if (ST->hasMVEIntegerOps())
17925 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17926 return NewLoad;
17927
17928 return SDValue();
17929}
17930
17932 const ARMSubtarget *ST) {
17933 if (ST->hasMVEFloatOps())
17934 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17935 return NewLoad;
17936
17937 return SDValue();
17938}
17939
17940// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17941// constant bounds.
17943 const ARMSubtarget *Subtarget) {
17944 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17945 !Subtarget->isThumb2())
17946 return SDValue();
17947
17948 EVT VT = Op.getValueType();
17949 SDValue Op0 = Op.getOperand(0);
17950
17951 if (VT != MVT::i32 ||
17952 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17953 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17954 !isa<ConstantSDNode>(Op0.getOperand(1)))
17955 return SDValue();
17956
17957 SDValue Min = Op;
17958 SDValue Max = Op0;
17959 SDValue Input = Op0.getOperand(0);
17960 if (Min.getOpcode() == ISD::SMAX)
17961 std::swap(Min, Max);
17962
17963 APInt MinC = Min.getConstantOperandAPInt(1);
17964 APInt MaxC = Max.getConstantOperandAPInt(1);
17965
17966 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17967 !(MinC + 1).isPowerOf2())
17968 return SDValue();
17969
17970 SDLoc DL(Op);
17971 if (MinC == ~MaxC)
17972 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17973 DAG.getConstant(MinC.countr_one(), DL, VT));
17974 if (MaxC == 0)
17975 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17976 DAG.getConstant(MinC.countr_one(), DL, VT));
17977
17978 return SDValue();
17979}
17980
17981/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17982/// saturates.
17984 const ARMSubtarget *ST) {
17985 EVT VT = N->getValueType(0);
17986 SDValue N0 = N->getOperand(0);
17987
17988 if (VT == MVT::i32)
17989 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17990
17991 if (!ST->hasMVEIntegerOps())
17992 return SDValue();
17993
17994 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17995 return V;
17996
17997 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17998 return SDValue();
17999
18000 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
18001 // Check one is a smin and the other is a smax
18002 if (Min->getOpcode() != ISD::SMIN)
18003 std::swap(Min, Max);
18004 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18005 return false;
18006
18007 APInt SaturateC;
18008 if (VT == MVT::v4i32)
18009 SaturateC = APInt(32, (1 << 15) - 1, true);
18010 else //if (VT == MVT::v8i16)
18011 SaturateC = APInt(16, (1 << 7) - 1, true);
18012
18013 APInt MinC, MaxC;
18014 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18015 MinC != SaturateC)
18016 return false;
18017 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18018 MaxC != ~SaturateC)
18019 return false;
18020 return true;
18021 };
18022
18023 if (IsSignedSaturate(N, N0.getNode())) {
18024 SDLoc DL(N);
18025 MVT ExtVT, HalfVT;
18026 if (VT == MVT::v4i32) {
18027 HalfVT = MVT::v8i16;
18028 ExtVT = MVT::v4i16;
18029 } else { // if (VT == MVT::v8i16)
18030 HalfVT = MVT::v16i8;
18031 ExtVT = MVT::v8i8;
18032 }
18033
18034 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18035 // half. That extend will hopefully be removed if only the bottom bits are
18036 // demanded (though a truncating store, for example).
18037 SDValue VQMOVN =
18038 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18039 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18040 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18041 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18042 DAG.getValueType(ExtVT));
18043 }
18044
18045 auto IsUnsignedSaturate = [&](SDNode *Min) {
18046 // For unsigned, we just need to check for <= 0xffff
18047 if (Min->getOpcode() != ISD::UMIN)
18048 return false;
18049
18050 APInt SaturateC;
18051 if (VT == MVT::v4i32)
18052 SaturateC = APInt(32, (1 << 16) - 1, true);
18053 else //if (VT == MVT::v8i16)
18054 SaturateC = APInt(16, (1 << 8) - 1, true);
18055
18056 APInt MinC;
18057 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18058 MinC != SaturateC)
18059 return false;
18060 return true;
18061 };
18062
18063 if (IsUnsignedSaturate(N)) {
18064 SDLoc DL(N);
18065 MVT HalfVT;
18066 unsigned ExtConst;
18067 if (VT == MVT::v4i32) {
18068 HalfVT = MVT::v8i16;
18069 ExtConst = 0x0000FFFF;
18070 } else { //if (VT == MVT::v8i16)
18071 HalfVT = MVT::v16i8;
18072 ExtConst = 0x00FF;
18073 }
18074
18075 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18076 // an AND. That extend will hopefully be removed if only the bottom bits are
18077 // demanded (though a truncating store, for example).
18078 SDValue VQMOVN =
18079 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18080 DAG.getConstant(0, DL, MVT::i32));
18081 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18082 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18083 DAG.getConstant(ExtConst, DL, VT));
18084 }
18085
18086 return SDValue();
18087}
18088
18090 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18091 if (!C)
18092 return nullptr;
18093 const APInt *CV = &C->getAPIntValue();
18094 return CV->isPowerOf2() ? CV : nullptr;
18095}
18096
18098 // If we have a CMOV, OR and AND combination such as:
18099 // if (x & CN)
18100 // y |= CM;
18101 //
18102 // And:
18103 // * CN is a single bit;
18104 // * All bits covered by CM are known zero in y
18105 //
18106 // Then we can convert this into a sequence of BFI instructions. This will
18107 // always be a win if CM is a single bit, will always be no worse than the
18108 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18109 // three bits (due to the extra IT instruction).
18110
18111 SDValue Op0 = CMOV->getOperand(0);
18112 SDValue Op1 = CMOV->getOperand(1);
18113 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18114 SDValue CmpZ = CMOV->getOperand(4);
18115
18116 // The compare must be against zero.
18117 if (!isNullConstant(CmpZ->getOperand(1)))
18118 return SDValue();
18119
18120 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18121 SDValue And = CmpZ->getOperand(0);
18122 if (And->getOpcode() != ISD::AND)
18123 return SDValue();
18124 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18125 if (!AndC)
18126 return SDValue();
18127 SDValue X = And->getOperand(0);
18128
18129 if (CC == ARMCC::EQ) {
18130 // We're performing an "equal to zero" compare. Swap the operands so we
18131 // canonicalize on a "not equal to zero" compare.
18132 std::swap(Op0, Op1);
18133 } else {
18134 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18135 }
18136
18137 if (Op1->getOpcode() != ISD::OR)
18138 return SDValue();
18139
18140 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18141 if (!OrC)
18142 return SDValue();
18143 SDValue Y = Op1->getOperand(0);
18144
18145 if (Op0 != Y)
18146 return SDValue();
18147
18148 // Now, is it profitable to continue?
18149 APInt OrCI = OrC->getAPIntValue();
18150 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18151 if (OrCI.popcount() > Heuristic)
18152 return SDValue();
18153
18154 // Lastly, can we determine that the bits defined by OrCI
18155 // are zero in Y?
18156 KnownBits Known = DAG.computeKnownBits(Y);
18157 if ((OrCI & Known.Zero) != OrCI)
18158 return SDValue();
18159
18160 // OK, we can do the combine.
18161 SDValue V = Y;
18162 SDLoc dl(X);
18163 EVT VT = X.getValueType();
18164 unsigned BitInX = AndC->logBase2();
18165
18166 if (BitInX != 0) {
18167 // We must shift X first.
18168 X = DAG.getNode(ISD::SRL, dl, VT, X,
18169 DAG.getConstant(BitInX, dl, VT));
18170 }
18171
18172 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18173 BitInY < NumActiveBits; ++BitInY) {
18174 if (OrCI[BitInY] == 0)
18175 continue;
18176 APInt Mask(VT.getSizeInBits(), 0);
18177 Mask.setBit(BitInY);
18178 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18179 // Confusingly, the operand is an *inverted* mask.
18180 DAG.getConstant(~Mask, dl, VT));
18181 }
18182
18183 return V;
18184}
18185
18186// Given N, the value controlling the conditional branch, search for the loop
18187// intrinsic, returning it, along with how the value is used. We need to handle
18188// patterns such as the following:
18189// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18190// (brcond (setcc (loop.decrement), 0, eq), exit)
18191// (brcond (setcc (loop.decrement), 0, ne), header)
18193 bool &Negate) {
18194 switch (N->getOpcode()) {
18195 default:
18196 break;
18197 case ISD::XOR: {
18198 if (!isa<ConstantSDNode>(N.getOperand(1)))
18199 return SDValue();
18200 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18201 return SDValue();
18202 Negate = !Negate;
18203 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18204 }
18205 case ISD::SETCC: {
18206 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18207 if (!Const)
18208 return SDValue();
18209 if (Const->isZero())
18210 Imm = 0;
18211 else if (Const->isOne())
18212 Imm = 1;
18213 else
18214 return SDValue();
18215 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18216 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18217 }
18219 unsigned IntOp = N.getConstantOperandVal(1);
18220 if (IntOp != Intrinsic::test_start_loop_iterations &&
18221 IntOp != Intrinsic::loop_decrement_reg)
18222 return SDValue();
18223 return N;
18224 }
18225 }
18226 return SDValue();
18227}
18228
18231 const ARMSubtarget *ST) {
18232
18233 // The hwloop intrinsics that we're interested are used for control-flow,
18234 // either for entering or exiting the loop:
18235 // - test.start.loop.iterations will test whether its operand is zero. If it
18236 // is zero, the proceeding branch should not enter the loop.
18237 // - loop.decrement.reg also tests whether its operand is zero. If it is
18238 // zero, the proceeding branch should not branch back to the beginning of
18239 // the loop.
18240 // So here, we need to check that how the brcond is using the result of each
18241 // of the intrinsics to ensure that we're branching to the right place at the
18242 // right time.
18243
18245 SDValue Cond;
18246 int Imm = 1;
18247 bool Negate = false;
18248 SDValue Chain = N->getOperand(0);
18249 SDValue Dest;
18250
18251 if (N->getOpcode() == ISD::BRCOND) {
18252 CC = ISD::SETEQ;
18253 Cond = N->getOperand(1);
18254 Dest = N->getOperand(2);
18255 } else {
18256 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18257 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18258 Cond = N->getOperand(2);
18259 Dest = N->getOperand(4);
18260 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18261 if (!Const->isOne() && !Const->isZero())
18262 return SDValue();
18263 Imm = Const->getZExtValue();
18264 } else
18265 return SDValue();
18266 }
18267
18268 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18269 if (!Int)
18270 return SDValue();
18271
18272 if (Negate)
18273 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18274
18275 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18276 return (CC == ISD::SETEQ && Imm == 0) ||
18277 (CC == ISD::SETNE && Imm == 1) ||
18278 (CC == ISD::SETLT && Imm == 1) ||
18279 (CC == ISD::SETULT && Imm == 1);
18280 };
18281
18282 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18283 return (CC == ISD::SETEQ && Imm == 1) ||
18284 (CC == ISD::SETNE && Imm == 0) ||
18285 (CC == ISD::SETGT && Imm == 0) ||
18286 (CC == ISD::SETUGT && Imm == 0) ||
18287 (CC == ISD::SETGE && Imm == 1) ||
18288 (CC == ISD::SETUGE && Imm == 1);
18289 };
18290
18291 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18292 "unsupported condition");
18293
18294 SDLoc dl(Int);
18295 SelectionDAG &DAG = DCI.DAG;
18296 SDValue Elements = Int.getOperand(2);
18297 unsigned IntOp = Int->getConstantOperandVal(1);
18298 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18299 && "expected single br user");
18300 SDNode *Br = *N->use_begin();
18301 SDValue OtherTarget = Br->getOperand(1);
18302
18303 // Update the unconditional branch to branch to the given Dest.
18304 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18305 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18306 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18307 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18308 };
18309
18310 if (IntOp == Intrinsic::test_start_loop_iterations) {
18311 SDValue Res;
18312 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18313 // We expect this 'instruction' to branch when the counter is zero.
18314 if (IsTrueIfZero(CC, Imm)) {
18315 SDValue Ops[] = {Chain, Setup, Dest};
18316 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18317 } else {
18318 // The logic is the reverse of what we need for WLS, so find the other
18319 // basic block target: the target of the proceeding br.
18320 UpdateUncondBr(Br, Dest, DAG);
18321
18322 SDValue Ops[] = {Chain, Setup, OtherTarget};
18323 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18324 }
18325 // Update LR count to the new value
18326 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18327 // Update chain
18328 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18329 return Res;
18330 } else {
18331 SDValue Size =
18332 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18333 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18334 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18335 DAG.getVTList(MVT::i32, MVT::Other), Args);
18336 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18337
18338 // We expect this instruction to branch when the count is not zero.
18339 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18340
18341 // Update the unconditional branch to target the loop preheader if we've
18342 // found the condition has been reversed.
18343 if (Target == OtherTarget)
18344 UpdateUncondBr(Br, Dest, DAG);
18345
18346 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18347 SDValue(LoopDec.getNode(), 1), Chain);
18348
18349 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18350 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18351 }
18352 return SDValue();
18353}
18354
18355/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18356SDValue
18358 SDValue Cmp = N->getOperand(4);
18359 if (Cmp.getOpcode() != ARMISD::CMPZ)
18360 // Only looking at NE cases.
18361 return SDValue();
18362
18363 EVT VT = N->getValueType(0);
18364 SDLoc dl(N);
18365 SDValue LHS = Cmp.getOperand(0);
18366 SDValue RHS = Cmp.getOperand(1);
18367 SDValue Chain = N->getOperand(0);
18368 SDValue BB = N->getOperand(1);
18369 SDValue ARMcc = N->getOperand(2);
18371
18372 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18373 // -> (brcond Chain BB CC CPSR Cmp)
18374 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18375 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18376 LHS->getOperand(0)->hasOneUse() &&
18377 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18378 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18379 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18380 return DAG.getNode(
18381 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18382 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18383 }
18384
18385 return SDValue();
18386}
18387
18388/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18389SDValue
18391 SDValue Cmp = N->getOperand(4);
18392 if (Cmp.getOpcode() != ARMISD::CMPZ)
18393 // Only looking at EQ and NE cases.
18394 return SDValue();
18395
18396 EVT VT = N->getValueType(0);
18397 SDLoc dl(N);
18398 SDValue LHS = Cmp.getOperand(0);
18399 SDValue RHS = Cmp.getOperand(1);
18400 SDValue FalseVal = N->getOperand(0);
18401 SDValue TrueVal = N->getOperand(1);
18402 SDValue ARMcc = N->getOperand(2);
18404
18405 // BFI is only available on V6T2+.
18406 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18408 if (R)
18409 return R;
18410 }
18411
18412 // Simplify
18413 // mov r1, r0
18414 // cmp r1, x
18415 // mov r0, y
18416 // moveq r0, x
18417 // to
18418 // cmp r0, x
18419 // movne r0, y
18420 //
18421 // mov r1, r0
18422 // cmp r1, x
18423 // mov r0, x
18424 // movne r0, y
18425 // to
18426 // cmp r0, x
18427 // movne r0, y
18428 /// FIXME: Turn this into a target neutral optimization?
18429 SDValue Res;
18430 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18431 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18432 N->getOperand(3), Cmp);
18433 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18434 SDValue ARMcc;
18435 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18436 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18437 N->getOperand(3), NewCmp);
18438 }
18439
18440 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18441 // -> (cmov F T CC CPSR Cmp)
18442 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18443 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18445 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18446 LHS->getOperand(2), LHS->getOperand(3),
18447 LHS->getOperand(4));
18448 }
18449
18450 if (!VT.isInteger())
18451 return SDValue();
18452
18453 // Fold away an unneccessary CMPZ/CMOV
18454 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18455 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18456 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18457 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18458 N->getConstantOperandVal(2) == ARMCC::NE) {
18460 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18461 if (N->getConstantOperandVal(2) == ARMCC::NE)
18463 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18464 N->getOperand(1),
18465 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18466 N->getOperand(3), C);
18467 }
18468 }
18469
18470 // Materialize a boolean comparison for integers so we can avoid branching.
18471 if (isNullConstant(FalseVal)) {
18472 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18473 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18474 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18475 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18476 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18477 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18478 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18479 DAG.getConstant(5, dl, MVT::i32));
18480 } else {
18481 // CMOV 0, 1, ==, (CMPZ x, y) ->
18482 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18483 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18484 //
18485 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18486 // x != y. In other words, a carry C == 1 when x == y, C == 0
18487 // otherwise.
18488 // The final UADDO_CARRY computes
18489 // x - y + (0 - (x - y)) + C == C
18490 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18491 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18492 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18493 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18494 // actually.
18495 SDValue Carry =
18496 DAG.getNode(ISD::SUB, dl, MVT::i32,
18497 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18498 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18499 }
18500 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18501 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18502 // This seems pointless but will allow us to combine it further below.
18503 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18504 SDValue Sub =
18505 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18506 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18507 Sub.getValue(1), SDValue());
18508 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18509 N->getOperand(3), CPSRGlue.getValue(1));
18510 FalseVal = Sub;
18511 }
18512 } else if (isNullConstant(TrueVal)) {
18513 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18514 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18515 // This seems pointless but will allow us to combine it further below
18516 // Note that we change == for != as this is the dual for the case above.
18517 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18518 SDValue Sub =
18519 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18520 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18521 Sub.getValue(1), SDValue());
18522 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18523 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18524 N->getOperand(3), CPSRGlue.getValue(1));
18525 FalseVal = Sub;
18526 }
18527 }
18528
18529 // On Thumb1, the DAG above may be further combined if z is a power of 2
18530 // (z == 2 ^ K).
18531 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18532 // t1 = (USUBO (SUB x, y), 1)
18533 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18534 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18535 //
18536 // This also handles the special case of comparing against zero; it's
18537 // essentially, the same pattern, except there's no SUBC:
18538 // CMOV x, z, !=, (CMPZ x, 0) ->
18539 // t1 = (USUBO x, 1)
18540 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18541 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18542 const APInt *TrueConst;
18543 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18544 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18545 FalseVal.getOperand(1) == RHS) ||
18546 (FalseVal == LHS && isNullConstant(RHS))) &&
18547 (TrueConst = isPowerOf2Constant(TrueVal))) {
18548 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18549 unsigned ShiftAmount = TrueConst->logBase2();
18550 if (ShiftAmount)
18551 TrueVal = DAG.getConstant(1, dl, VT);
18552 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18553 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18554 Subc.getValue(1));
18555
18556 if (ShiftAmount)
18557 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18558 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18559 }
18560
18561 if (Res.getNode()) {
18562 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18563 // Capture demanded bits information that would be otherwise lost.
18564 if (Known.Zero == 0xfffffffe)
18565 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18566 DAG.getValueType(MVT::i1));
18567 else if (Known.Zero == 0xffffff00)
18568 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18569 DAG.getValueType(MVT::i8));
18570 else if (Known.Zero == 0xffff0000)
18571 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18572 DAG.getValueType(MVT::i16));
18573 }
18574
18575 return Res;
18576}
18577
18580 const ARMSubtarget *ST) {
18581 SelectionDAG &DAG = DCI.DAG;
18582 SDValue Src = N->getOperand(0);
18583 EVT DstVT = N->getValueType(0);
18584
18585 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18586 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18587 EVT SrcVT = Src.getValueType();
18588 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18589 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18590 }
18591
18592 // We may have a bitcast of something that has already had this bitcast
18593 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18594 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18595 Src = Src.getOperand(0);
18596
18597 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18598 // would be generated is at least the width of the element type.
18599 EVT SrcVT = Src.getValueType();
18600 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18601 Src.getOpcode() == ARMISD::VMVNIMM ||
18602 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18603 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18604 DAG.getDataLayout().isBigEndian())
18605 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18606
18607 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18608 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18609 return R;
18610
18611 return SDValue();
18612}
18613
18614// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18615// node into stack operations after legalizeOps.
18618 SelectionDAG &DAG = DCI.DAG;
18619 EVT VT = N->getValueType(0);
18620 SDLoc DL(N);
18621
18622 // MVETrunc(Undef, Undef) -> Undef
18623 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18624 return DAG.getUNDEF(VT);
18625
18626 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18627 if (N->getNumOperands() == 2 &&
18628 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18629 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18630 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18631 N->getOperand(0).getOperand(1),
18632 N->getOperand(1).getOperand(0),
18633 N->getOperand(1).getOperand(1));
18634
18635 // MVETrunc(shuffle, shuffle) -> VMOVN
18636 if (N->getNumOperands() == 2 &&
18637 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18638 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18639 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18640 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18641
18642 if (S0->getOperand(0) == S1->getOperand(0) &&
18643 S0->getOperand(1) == S1->getOperand(1)) {
18644 // Construct complete shuffle mask
18645 SmallVector<int, 8> Mask(S0->getMask());
18646 Mask.append(S1->getMask().begin(), S1->getMask().end());
18647
18648 if (isVMOVNTruncMask(Mask, VT, false))
18649 return DAG.getNode(
18650 ARMISD::VMOVN, DL, VT,
18651 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18652 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18653 DAG.getConstant(1, DL, MVT::i32));
18654 if (isVMOVNTruncMask(Mask, VT, true))
18655 return DAG.getNode(
18656 ARMISD::VMOVN, DL, VT,
18657 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18658 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18659 DAG.getConstant(1, DL, MVT::i32));
18660 }
18661 }
18662
18663 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18664 // truncate to a buildvector to allow the generic optimisations to kick in.
18665 if (all_of(N->ops(), [](SDValue Op) {
18666 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18667 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18668 (Op.getOpcode() == ISD::BITCAST &&
18669 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18670 })) {
18671 SmallVector<SDValue, 8> Extracts;
18672 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18673 SDValue O = N->getOperand(Op);
18674 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18675 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18676 DAG.getConstant(i, DL, MVT::i32));
18677 Extracts.push_back(Ext);
18678 }
18679 }
18680 return DAG.getBuildVector(VT, DL, Extracts);
18681 }
18682
18683 // If we are late in the legalization process and nothing has optimised
18684 // the trunc to anything better, lower it to a stack store and reload,
18685 // performing the truncation whilst keeping the lanes in the correct order:
18686 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18687 if (!DCI.isAfterLegalizeDAG())
18688 return SDValue();
18689
18690 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18691 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18692 int NumIns = N->getNumOperands();
18693 assert((NumIns == 2 || NumIns == 4) &&
18694 "Expected 2 or 4 inputs to an MVETrunc");
18695 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18696 if (N->getNumOperands() == 4)
18697 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18698
18699 SmallVector<SDValue> Chains;
18700 for (int I = 0; I < NumIns; I++) {
18701 SDValue Ptr = DAG.getNode(
18702 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18703 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18705 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18706 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18707 Ptr, MPI, StoreVT, Align(4));
18708 Chains.push_back(Ch);
18709 }
18710
18711 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18712 MachinePointerInfo MPI =
18714 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18715}
18716
18717// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18719 SelectionDAG &DAG) {
18720 SDValue N0 = N->getOperand(0);
18721 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18722 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18723 return SDValue();
18724
18725 EVT FromVT = LD->getMemoryVT();
18726 EVT ToVT = N->getValueType(0);
18727 if (!ToVT.isVector())
18728 return SDValue();
18729 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18730 EVT ToEltVT = ToVT.getVectorElementType();
18731 EVT FromEltVT = FromVT.getVectorElementType();
18732
18733 unsigned NumElements = 0;
18734 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18735 NumElements = 4;
18736 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18737 NumElements = 8;
18738 assert(NumElements != 0);
18739
18740 ISD::LoadExtType NewExtType =
18741 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18742 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18743 LD->getExtensionType() != ISD::EXTLOAD &&
18744 LD->getExtensionType() != NewExtType)
18745 return SDValue();
18746
18747 LLVMContext &C = *DAG.getContext();
18748 SDLoc DL(LD);
18749 // Details about the old load
18750 SDValue Ch = LD->getChain();
18751 SDValue BasePtr = LD->getBasePtr();
18752 Align Alignment = LD->getOriginalAlign();
18753 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18754 AAMDNodes AAInfo = LD->getAAInfo();
18755
18756 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18757 EVT NewFromVT = EVT::getVectorVT(
18758 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18759 EVT NewToVT = EVT::getVectorVT(
18760 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18761
18764 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18765 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18766 SDValue NewPtr =
18767 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18768
18769 SDValue NewLoad =
18770 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18771 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18772 Alignment, MMOFlags, AAInfo);
18773 Loads.push_back(NewLoad);
18774 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18775 }
18776
18777 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18778 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18779 return DAG.getMergeValues(Loads, DL);
18780}
18781
18782// Perform combines for MVEEXT. If it has not be optimized to anything better
18783// before lowering, it gets converted to stack store and extloads performing the
18784// extend whilst still keeping the same lane ordering.
18787 SelectionDAG &DAG = DCI.DAG;
18788 EVT VT = N->getValueType(0);
18789 SDLoc DL(N);
18790 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18791 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18792
18793 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18794 *DAG.getContext());
18795 auto Extend = [&](SDValue V) {
18796 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18797 return N->getOpcode() == ARMISD::MVESEXT
18798 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18799 DAG.getValueType(ExtVT))
18800 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18801 };
18802
18803 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18804 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18805 SDValue Ext = Extend(N->getOperand(0));
18806 return DAG.getMergeValues({Ext, Ext}, DL);
18807 }
18808
18809 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18810 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18811 ArrayRef<int> Mask = SVN->getMask();
18812 assert(Mask.size() == 2 * VT.getVectorNumElements());
18813 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18814 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18815 SDValue Op0 = SVN->getOperand(0);
18816 SDValue Op1 = SVN->getOperand(1);
18817
18818 auto CheckInregMask = [&](int Start, int Offset) {
18819 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18820 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18821 return false;
18822 return true;
18823 };
18824 SDValue V0 = SDValue(N, 0);
18825 SDValue V1 = SDValue(N, 1);
18826 if (CheckInregMask(0, 0))
18827 V0 = Extend(Op0);
18828 else if (CheckInregMask(0, 1))
18829 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18830 else if (CheckInregMask(0, Mask.size()))
18831 V0 = Extend(Op1);
18832 else if (CheckInregMask(0, Mask.size() + 1))
18833 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18834
18835 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18836 V1 = Extend(Op1);
18837 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18838 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18839 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18840 V1 = Extend(Op0);
18841 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18842 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18843
18844 if (V0.getNode() != N || V1.getNode() != N)
18845 return DAG.getMergeValues({V0, V1}, DL);
18846 }
18847
18848 // MVEEXT(load) -> extload, extload
18849 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18851 return L;
18852
18853 if (!DCI.isAfterLegalizeDAG())
18854 return SDValue();
18855
18856 // Lower to a stack store and reload:
18857 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18858 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18859 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18860 int NumOuts = N->getNumValues();
18861 assert((NumOuts == 2 || NumOuts == 4) &&
18862 "Expected 2 or 4 outputs to an MVEEXT");
18863 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18864 *DAG.getContext());
18865 if (N->getNumOperands() == 4)
18866 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18867
18868 MachinePointerInfo MPI =
18870 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18871 StackPtr, MPI, Align(4));
18872
18874 for (int I = 0; I < NumOuts; I++) {
18875 SDValue Ptr = DAG.getNode(
18876 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18877 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18879 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18880 SDValue Load = DAG.getExtLoad(
18881 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18882 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18883 Loads.push_back(Load);
18884 }
18885
18886 return DAG.getMergeValues(Loads, DL);
18887}
18888
18890 DAGCombinerInfo &DCI) const {
18891 switch (N->getOpcode()) {
18892 default: break;
18893 case ISD::SELECT_CC:
18894 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18895 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18896 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18897 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18898 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18899 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18900 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18901 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18902 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18903 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18904 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18905 case ISD::BRCOND:
18906 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18907 case ARMISD::ADDC:
18908 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18909 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18910 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18911 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18912 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18913 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18914 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18915 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18916 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18919 return PerformExtractEltCombine(N, DCI, Subtarget);
18923 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18924 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18925 case ISD::FP_TO_SINT:
18926 case ISD::FP_TO_UINT:
18927 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18928 case ISD::FADD:
18929 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18930 case ISD::FMUL:
18931 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18933 return PerformIntrinsicCombine(N, DCI);
18934 case ISD::SHL:
18935 case ISD::SRA:
18936 case ISD::SRL:
18937 return PerformShiftCombine(N, DCI, Subtarget);
18938 case ISD::SIGN_EXTEND:
18939 case ISD::ZERO_EXTEND:
18940 case ISD::ANY_EXTEND:
18941 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18942 case ISD::FP_EXTEND:
18943 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18944 case ISD::SMIN:
18945 case ISD::UMIN:
18946 case ISD::SMAX:
18947 case ISD::UMAX:
18948 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18949 case ARMISD::CMOV:
18950 return PerformCMOVCombine(N, DCI.DAG);
18951 case ARMISD::BRCOND:
18952 return PerformBRCONDCombine(N, DCI.DAG);
18953 case ARMISD::CMPZ:
18954 return PerformCMPZCombine(N, DCI.DAG);
18955 case ARMISD::CSINC:
18956 case ARMISD::CSINV:
18957 case ARMISD::CSNEG:
18958 return PerformCSETCombine(N, DCI.DAG);
18959 case ISD::LOAD:
18960 return PerformLOADCombine(N, DCI, Subtarget);
18961 case ARMISD::VLD1DUP:
18962 case ARMISD::VLD2DUP:
18963 case ARMISD::VLD3DUP:
18964 case ARMISD::VLD4DUP:
18965 return PerformVLDCombine(N, DCI);
18967 return PerformARMBUILD_VECTORCombine(N, DCI);
18968 case ISD::BITCAST:
18969 return PerformBITCASTCombine(N, DCI, Subtarget);
18971 return PerformPREDICATE_CASTCombine(N, DCI);
18973 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18974 case ARMISD::MVETRUNC:
18975 return PerformMVETruncCombine(N, DCI);
18976 case ARMISD::MVESEXT:
18977 case ARMISD::MVEZEXT:
18978 return PerformMVEExtCombine(N, DCI);
18979 case ARMISD::VCMP:
18980 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18981 case ISD::VECREDUCE_ADD:
18982 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18983 case ARMISD::VADDVs:
18984 case ARMISD::VADDVu:
18985 case ARMISD::VADDLVs:
18986 case ARMISD::VADDLVu:
18987 case ARMISD::VADDLVAs:
18988 case ARMISD::VADDLVAu:
18989 case ARMISD::VMLAVs:
18990 case ARMISD::VMLAVu:
18991 case ARMISD::VMLALVs:
18992 case ARMISD::VMLALVu:
18993 case ARMISD::VMLALVAs:
18994 case ARMISD::VMLALVAu:
18995 return PerformReduceShuffleCombine(N, DCI.DAG);
18996 case ARMISD::VMOVN:
18997 return PerformVMOVNCombine(N, DCI);
18998 case ARMISD::VQMOVNs:
18999 case ARMISD::VQMOVNu:
19000 return PerformVQMOVNCombine(N, DCI);
19001 case ARMISD::VQDMULH:
19002 return PerformVQDMULHCombine(N, DCI);
19003 case ARMISD::ASRL:
19004 case ARMISD::LSRL:
19005 case ARMISD::LSLL:
19006 return PerformLongShiftCombine(N, DCI.DAG);
19007 case ARMISD::SMULWB: {
19008 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19009 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19010 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19011 return SDValue();
19012 break;
19013 }
19014 case ARMISD::SMULWT: {
19015 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19016 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19017 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19018 return SDValue();
19019 break;
19020 }
19021 case ARMISD::SMLALBB:
19022 case ARMISD::QADD16b:
19023 case ARMISD::QSUB16b:
19024 case ARMISD::UQADD16b:
19025 case ARMISD::UQSUB16b: {
19026 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19027 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19028 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19029 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19030 return SDValue();
19031 break;
19032 }
19033 case ARMISD::SMLALBT: {
19034 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19035 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19036 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19037 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19038 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19039 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19040 return SDValue();
19041 break;
19042 }
19043 case ARMISD::SMLALTB: {
19044 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19045 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19046 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19047 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19048 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19049 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19050 return SDValue();
19051 break;
19052 }
19053 case ARMISD::SMLALTT: {
19054 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19055 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19056 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19057 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19058 return SDValue();
19059 break;
19060 }
19061 case ARMISD::QADD8b:
19062 case ARMISD::QSUB8b:
19063 case ARMISD::UQADD8b:
19064 case ARMISD::UQSUB8b: {
19065 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19066 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19067 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19068 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19069 return SDValue();
19070 break;
19071 }
19074 switch (N->getConstantOperandVal(1)) {
19075 case Intrinsic::arm_neon_vld1:
19076 case Intrinsic::arm_neon_vld1x2:
19077 case Intrinsic::arm_neon_vld1x3:
19078 case Intrinsic::arm_neon_vld1x4:
19079 case Intrinsic::arm_neon_vld2:
19080 case Intrinsic::arm_neon_vld3:
19081 case Intrinsic::arm_neon_vld4:
19082 case Intrinsic::arm_neon_vld2lane:
19083 case Intrinsic::arm_neon_vld3lane:
19084 case Intrinsic::arm_neon_vld4lane:
19085 case Intrinsic::arm_neon_vld2dup:
19086 case Intrinsic::arm_neon_vld3dup:
19087 case Intrinsic::arm_neon_vld4dup:
19088 case Intrinsic::arm_neon_vst1:
19089 case Intrinsic::arm_neon_vst1x2:
19090 case Intrinsic::arm_neon_vst1x3:
19091 case Intrinsic::arm_neon_vst1x4:
19092 case Intrinsic::arm_neon_vst2:
19093 case Intrinsic::arm_neon_vst3:
19094 case Intrinsic::arm_neon_vst4:
19095 case Intrinsic::arm_neon_vst2lane:
19096 case Intrinsic::arm_neon_vst3lane:
19097 case Intrinsic::arm_neon_vst4lane:
19098 return PerformVLDCombine(N, DCI);
19099 case Intrinsic::arm_mve_vld2q:
19100 case Intrinsic::arm_mve_vld4q:
19101 case Intrinsic::arm_mve_vst2q:
19102 case Intrinsic::arm_mve_vst4q:
19103 return PerformMVEVLDCombine(N, DCI);
19104 default: break;
19105 }
19106 break;
19107 }
19108 return SDValue();
19109}
19110
19112 EVT VT) const {
19113 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19114}
19115
19117 Align Alignment,
19119 unsigned *Fast) const {
19120 // Depends what it gets converted into if the type is weird.
19121 if (!VT.isSimple())
19122 return false;
19123
19124 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19125 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19126 auto Ty = VT.getSimpleVT().SimpleTy;
19127
19128 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19129 // Unaligned access can use (for example) LRDB, LRDH, LDR
19130 if (AllowsUnaligned) {
19131 if (Fast)
19132 *Fast = Subtarget->hasV7Ops();
19133 return true;
19134 }
19135 }
19136
19137 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19138 // For any little-endian targets with neon, we can support unaligned ld/st
19139 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19140 // A big-endian target may also explicitly support unaligned accesses
19141 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19142 if (Fast)
19143 *Fast = 1;
19144 return true;
19145 }
19146 }
19147
19148 if (!Subtarget->hasMVEIntegerOps())
19149 return false;
19150
19151 // These are for predicates
19152 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19153 Ty == MVT::v2i1)) {
19154 if (Fast)
19155 *Fast = 1;
19156 return true;
19157 }
19158
19159 // These are for truncated stores/narrowing loads. They are fine so long as
19160 // the alignment is at least the size of the item being loaded
19161 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19162 Alignment >= VT.getScalarSizeInBits() / 8) {
19163 if (Fast)
19164 *Fast = true;
19165 return true;
19166 }
19167
19168 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19169 // VSTRW.U32 all store the vector register in exactly the same format, and
19170 // differ only in the range of their immediate offset field and the required
19171 // alignment. So there is always a store that can be used, regardless of
19172 // actual type.
19173 //
19174 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19175 // VREV64.8) pair and get the same effect. This will likely be better than
19176 // aligning the vector through the stack.
19177 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19178 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19179 Ty == MVT::v2f64) {
19180 if (Fast)
19181 *Fast = 1;
19182 return true;
19183 }
19184
19185 return false;
19186}
19187
19188
19190 const MemOp &Op, const AttributeList &FuncAttributes) const {
19191 // See if we can use NEON instructions for this...
19192 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19193 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19194 unsigned Fast;
19195 if (Op.size() >= 16 &&
19196 (Op.isAligned(Align(16)) ||
19197 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19199 Fast))) {
19200 return MVT::v2f64;
19201 } else if (Op.size() >= 8 &&
19202 (Op.isAligned(Align(8)) ||
19204 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19205 Fast))) {
19206 return MVT::f64;
19207 }
19208 }
19209
19210 // Let the target-independent logic figure it out.
19211 return MVT::Other;
19212}
19213
19214// 64-bit integers are split into their high and low parts and held in two
19215// different registers, so the trunc is free since the low register can just
19216// be used.
19217bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19218 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19219 return false;
19220 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19221 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19222 return (SrcBits == 64 && DestBits == 32);
19223}
19224
19226 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19227 !DstVT.isInteger())
19228 return false;
19229 unsigned SrcBits = SrcVT.getSizeInBits();
19230 unsigned DestBits = DstVT.getSizeInBits();
19231 return (SrcBits == 64 && DestBits == 32);
19232}
19233
19235 if (Val.getOpcode() != ISD::LOAD)
19236 return false;
19237
19238 EVT VT1 = Val.getValueType();
19239 if (!VT1.isSimple() || !VT1.isInteger() ||
19240 !VT2.isSimple() || !VT2.isInteger())
19241 return false;
19242
19243 switch (VT1.getSimpleVT().SimpleTy) {
19244 default: break;
19245 case MVT::i1:
19246 case MVT::i8:
19247 case MVT::i16:
19248 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19249 return true;
19250 }
19251
19252 return false;
19253}
19254
19256 if (!VT.isSimple())
19257 return false;
19258
19259 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19260 // negate values directly (fneg is free). So, we don't want to let the DAG
19261 // combiner rewrite fneg into xors and some other instructions. For f16 and
19262 // FullFP16 argument passing, some bitcast nodes may be introduced,
19263 // triggering this DAG combine rewrite, so we are avoiding that with this.
19264 switch (VT.getSimpleVT().SimpleTy) {
19265 default: break;
19266 case MVT::f16:
19267 return Subtarget->hasFullFP16();
19268 }
19269
19270 return false;
19271}
19272
19273/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19274/// of the vector elements.
19275static bool areExtractExts(Value *Ext1, Value *Ext2) {
19276 auto areExtDoubled = [](Instruction *Ext) {
19277 return Ext->getType()->getScalarSizeInBits() ==
19278 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19279 };
19280
19281 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19282 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19283 !areExtDoubled(cast<Instruction>(Ext1)) ||
19284 !areExtDoubled(cast<Instruction>(Ext2)))
19285 return false;
19286
19287 return true;
19288}
19289
19290/// Check if sinking \p I's operands to I's basic block is profitable, because
19291/// the operands can be folded into a target instruction, e.g.
19292/// sext/zext can be folded into vsubl.
19294 SmallVectorImpl<Use *> &Ops) const {
19295 if (!I->getType()->isVectorTy())
19296 return false;
19297
19298 if (Subtarget->hasNEON()) {
19299 switch (I->getOpcode()) {
19300 case Instruction::Sub:
19301 case Instruction::Add: {
19302 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19303 return false;
19304 Ops.push_back(&I->getOperandUse(0));
19305 Ops.push_back(&I->getOperandUse(1));
19306 return true;
19307 }
19308 default:
19309 return false;
19310 }
19311 }
19312
19313 if (!Subtarget->hasMVEIntegerOps())
19314 return false;
19315
19316 auto IsFMSMul = [&](Instruction *I) {
19317 if (!I->hasOneUse())
19318 return false;
19319 auto *Sub = cast<Instruction>(*I->users().begin());
19320 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19321 };
19322 auto IsFMS = [&](Instruction *I) {
19323 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19324 match(I->getOperand(1), m_FNeg(m_Value())))
19325 return true;
19326 return false;
19327 };
19328
19329 auto IsSinker = [&](Instruction *I, int Operand) {
19330 switch (I->getOpcode()) {
19331 case Instruction::Add:
19332 case Instruction::Mul:
19333 case Instruction::FAdd:
19334 case Instruction::ICmp:
19335 case Instruction::FCmp:
19336 return true;
19337 case Instruction::FMul:
19338 return !IsFMSMul(I);
19339 case Instruction::Sub:
19340 case Instruction::FSub:
19341 case Instruction::Shl:
19342 case Instruction::LShr:
19343 case Instruction::AShr:
19344 return Operand == 1;
19345 case Instruction::Call:
19346 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19347 switch (II->getIntrinsicID()) {
19348 case Intrinsic::fma:
19349 return !IsFMS(I);
19350 case Intrinsic::sadd_sat:
19351 case Intrinsic::uadd_sat:
19352 case Intrinsic::arm_mve_add_predicated:
19353 case Intrinsic::arm_mve_mul_predicated:
19354 case Intrinsic::arm_mve_qadd_predicated:
19355 case Intrinsic::arm_mve_vhadd:
19356 case Intrinsic::arm_mve_hadd_predicated:
19357 case Intrinsic::arm_mve_vqdmull:
19358 case Intrinsic::arm_mve_vqdmull_predicated:
19359 case Intrinsic::arm_mve_vqdmulh:
19360 case Intrinsic::arm_mve_qdmulh_predicated:
19361 case Intrinsic::arm_mve_vqrdmulh:
19362 case Intrinsic::arm_mve_qrdmulh_predicated:
19363 case Intrinsic::arm_mve_fma_predicated:
19364 return true;
19365 case Intrinsic::ssub_sat:
19366 case Intrinsic::usub_sat:
19367 case Intrinsic::arm_mve_sub_predicated:
19368 case Intrinsic::arm_mve_qsub_predicated:
19369 case Intrinsic::arm_mve_hsub_predicated:
19370 case Intrinsic::arm_mve_vhsub:
19371 return Operand == 1;
19372 default:
19373 return false;
19374 }
19375 }
19376 return false;
19377 default:
19378 return false;
19379 }
19380 };
19381
19382 for (auto OpIdx : enumerate(I->operands())) {
19383 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19384 // Make sure we are not already sinking this operand
19385 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19386 continue;
19387
19388 Instruction *Shuffle = Op;
19389 if (Shuffle->getOpcode() == Instruction::BitCast)
19390 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19391 // We are looking for a splat that can be sunk.
19392 if (!Shuffle ||
19393 !match(Shuffle, m_Shuffle(
19395 m_Undef(), m_ZeroMask())))
19396 continue;
19397 if (!IsSinker(I, OpIdx.index()))
19398 continue;
19399
19400 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19401 // and vector registers
19402 for (Use &U : Op->uses()) {
19403 Instruction *Insn = cast<Instruction>(U.getUser());
19404 if (!IsSinker(Insn, U.getOperandNo()))
19405 return false;
19406 }
19407
19408 Ops.push_back(&Shuffle->getOperandUse(0));
19409 if (Shuffle != Op)
19410 Ops.push_back(&Op->getOperandUse(0));
19411 Ops.push_back(&OpIdx.value());
19412 }
19413 return true;
19414}
19415
19417 if (!Subtarget->hasMVEIntegerOps())
19418 return nullptr;
19419 Type *SVIType = SVI->getType();
19420 Type *ScalarType = SVIType->getScalarType();
19421
19422 if (ScalarType->isFloatTy())
19423 return Type::getInt32Ty(SVIType->getContext());
19424 if (ScalarType->isHalfTy())
19425 return Type::getInt16Ty(SVIType->getContext());
19426 return nullptr;
19427}
19428
19430 EVT VT = ExtVal.getValueType();
19431
19432 if (!isTypeLegal(VT))
19433 return false;
19434
19435 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19436 if (Ld->isExpandingLoad())
19437 return false;
19438 }
19439
19440 if (Subtarget->hasMVEIntegerOps())
19441 return true;
19442
19443 // Don't create a loadext if we can fold the extension into a wide/long
19444 // instruction.
19445 // If there's more than one user instruction, the loadext is desirable no
19446 // matter what. There can be two uses by the same instruction.
19447 if (ExtVal->use_empty() ||
19448 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19449 return true;
19450
19451 SDNode *U = *ExtVal->use_begin();
19452 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19453 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19454 return false;
19455
19456 return true;
19457}
19458
19460 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19461 return false;
19462
19463 if (!isTypeLegal(EVT::getEVT(Ty1)))
19464 return false;
19465
19466 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19467
19468 // Assuming the caller doesn't have a zeroext or signext return parameter,
19469 // truncation all the way down to i1 is valid.
19470 return true;
19471}
19472
19473/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19474/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19475/// expanded to FMAs when this method returns true, otherwise fmuladd is
19476/// expanded to fmul + fadd.
19477///
19478/// ARM supports both fused and unfused multiply-add operations; we already
19479/// lower a pair of fmul and fadd to the latter so it's not clear that there
19480/// would be a gain or that the gain would be worthwhile enough to risk
19481/// correctness bugs.
19482///
19483/// For MVE, we set this to true as it helps simplify the need for some
19484/// patterns (and we don't have the non-fused floating point instruction).
19485bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19486 EVT VT) const {
19487 if (!VT.isSimple())
19488 return false;
19489
19490 switch (VT.getSimpleVT().SimpleTy) {
19491 case MVT::v4f32:
19492 case MVT::v8f16:
19493 return Subtarget->hasMVEFloatOps();
19494 case MVT::f16:
19495 return Subtarget->useFPVFMx16();
19496 case MVT::f32:
19497 return Subtarget->useFPVFMx();
19498 case MVT::f64:
19499 return Subtarget->useFPVFMx64();
19500 default:
19501 break;
19502 }
19503
19504 return false;
19505}
19506
19507static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19508 if (V < 0)
19509 return false;
19510
19511 unsigned Scale = 1;
19512 switch (VT.getSimpleVT().SimpleTy) {
19513 case MVT::i1:
19514 case MVT::i8:
19515 // Scale == 1;
19516 break;
19517 case MVT::i16:
19518 // Scale == 2;
19519 Scale = 2;
19520 break;
19521 default:
19522 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19523 // Scale == 4;
19524 Scale = 4;
19525 break;
19526 }
19527
19528 if ((V & (Scale - 1)) != 0)
19529 return false;
19530 return isUInt<5>(V / Scale);
19531}
19532
19533static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19534 const ARMSubtarget *Subtarget) {
19535 if (!VT.isInteger() && !VT.isFloatingPoint())
19536 return false;
19537 if (VT.isVector() && Subtarget->hasNEON())
19538 return false;
19539 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19540 !Subtarget->hasMVEFloatOps())
19541 return false;
19542
19543 bool IsNeg = false;
19544 if (V < 0) {
19545 IsNeg = true;
19546 V = -V;
19547 }
19548
19549 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19550
19551 // MVE: size * imm7
19552 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19553 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19554 case MVT::i32:
19555 case MVT::f32:
19556 return isShiftedUInt<7,2>(V);
19557 case MVT::i16:
19558 case MVT::f16:
19559 return isShiftedUInt<7,1>(V);
19560 case MVT::i8:
19561 return isUInt<7>(V);
19562 default:
19563 return false;
19564 }
19565 }
19566
19567 // half VLDR: 2 * imm8
19568 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19569 return isShiftedUInt<8, 1>(V);
19570 // VLDR and LDRD: 4 * imm8
19571 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19572 return isShiftedUInt<8, 2>(V);
19573
19574 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19575 // + imm12 or - imm8
19576 if (IsNeg)
19577 return isUInt<8>(V);
19578 return isUInt<12>(V);
19579 }
19580
19581 return false;
19582}
19583
19584/// isLegalAddressImmediate - Return true if the integer value can be used
19585/// as the offset of the target addressing mode for load / store of the
19586/// given type.
19587static bool isLegalAddressImmediate(int64_t V, EVT VT,
19588 const ARMSubtarget *Subtarget) {
19589 if (V == 0)
19590 return true;
19591
19592 if (!VT.isSimple())
19593 return false;
19594
19595 if (Subtarget->isThumb1Only())
19596 return isLegalT1AddressImmediate(V, VT);
19597 else if (Subtarget->isThumb2())
19598 return isLegalT2AddressImmediate(V, VT, Subtarget);
19599
19600 // ARM mode.
19601 if (V < 0)
19602 V = - V;
19603 switch (VT.getSimpleVT().SimpleTy) {
19604 default: return false;
19605 case MVT::i1:
19606 case MVT::i8:
19607 case MVT::i32:
19608 // +- imm12
19609 return isUInt<12>(V);
19610 case MVT::i16:
19611 // +- imm8
19612 return isUInt<8>(V);
19613 case MVT::f32:
19614 case MVT::f64:
19615 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19616 return false;
19617 return isShiftedUInt<8, 2>(V);
19618 }
19619}
19620
19622 EVT VT) const {
19623 int Scale = AM.Scale;
19624 if (Scale < 0)
19625 return false;
19626
19627 switch (VT.getSimpleVT().SimpleTy) {
19628 default: return false;
19629 case MVT::i1:
19630 case MVT::i8:
19631 case MVT::i16:
19632 case MVT::i32:
19633 if (Scale == 1)
19634 return true;
19635 // r + r << imm
19636 Scale = Scale & ~1;
19637 return Scale == 2 || Scale == 4 || Scale == 8;
19638 case MVT::i64:
19639 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19640 // version in Thumb mode.
19641 // r + r
19642 if (Scale == 1)
19643 return true;
19644 // r * 2 (this can be lowered to r + r).
19645 if (!AM.HasBaseReg && Scale == 2)
19646 return true;
19647 return false;
19648 case MVT::isVoid:
19649 // Note, we allow "void" uses (basically, uses that aren't loads or
19650 // stores), because arm allows folding a scale into many arithmetic
19651 // operations. This should be made more precise and revisited later.
19652
19653 // Allow r << imm, but the imm has to be a multiple of two.
19654 if (Scale & 1) return false;
19655 return isPowerOf2_32(Scale);
19656 }
19657}
19658
19660 EVT VT) const {
19661 const int Scale = AM.Scale;
19662
19663 // Negative scales are not supported in Thumb1.
19664 if (Scale < 0)
19665 return false;
19666
19667 // Thumb1 addressing modes do not support register scaling excepting the
19668 // following cases:
19669 // 1. Scale == 1 means no scaling.
19670 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19671 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19672}
19673
19674/// isLegalAddressingMode - Return true if the addressing mode represented
19675/// by AM is legal for this target, for a load/store of the specified type.
19677 const AddrMode &AM, Type *Ty,
19678 unsigned AS, Instruction *I) const {
19679 EVT VT = getValueType(DL, Ty, true);
19680 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19681 return false;
19682
19683 // Can never fold addr of global into load/store.
19684 if (AM.BaseGV)
19685 return false;
19686
19687 switch (AM.Scale) {
19688 case 0: // no scale reg, must be "r+i" or "r", or "i".
19689 break;
19690 default:
19691 // ARM doesn't support any R+R*scale+imm addr modes.
19692 if (AM.BaseOffs)
19693 return false;
19694
19695 if (!VT.isSimple())
19696 return false;
19697
19698 if (Subtarget->isThumb1Only())
19699 return isLegalT1ScaledAddressingMode(AM, VT);
19700
19701 if (Subtarget->isThumb2())
19702 return isLegalT2ScaledAddressingMode(AM, VT);
19703
19704 int Scale = AM.Scale;
19705 switch (VT.getSimpleVT().SimpleTy) {
19706 default: return false;
19707 case MVT::i1:
19708 case MVT::i8:
19709 case MVT::i32:
19710 if (Scale < 0) Scale = -Scale;
19711 if (Scale == 1)
19712 return true;
19713 // r + r << imm
19714 return isPowerOf2_32(Scale & ~1);
19715 case MVT::i16:
19716 case MVT::i64:
19717 // r +/- r
19718 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19719 return true;
19720 // r * 2 (this can be lowered to r + r).
19721 if (!AM.HasBaseReg && Scale == 2)
19722 return true;
19723 return false;
19724
19725 case MVT::isVoid:
19726 // Note, we allow "void" uses (basically, uses that aren't loads or
19727 // stores), because arm allows folding a scale into many arithmetic
19728 // operations. This should be made more precise and revisited later.
19729
19730 // Allow r << imm, but the imm has to be a multiple of two.
19731 if (Scale & 1) return false;
19732 return isPowerOf2_32(Scale);
19733 }
19734 }
19735 return true;
19736}
19737
19738/// isLegalICmpImmediate - Return true if the specified immediate is legal
19739/// icmp immediate, that is the target has icmp instructions which can compare
19740/// a register against the immediate without having to materialize the
19741/// immediate into a register.
19743 // Thumb2 and ARM modes can use cmn for negative immediates.
19744 if (!Subtarget->isThumb())
19745 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19746 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19747 if (Subtarget->isThumb2())
19748 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19749 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19750 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19751 return Imm >= 0 && Imm <= 255;
19752}
19753
19754/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19755/// *or sub* immediate, that is the target has add or sub instructions which can
19756/// add a register with the immediate without having to materialize the
19757/// immediate into a register.
19759 // Same encoding for add/sub, just flip the sign.
19760 int64_t AbsImm = std::abs(Imm);
19761 if (!Subtarget->isThumb())
19762 return ARM_AM::getSOImmVal(AbsImm) != -1;
19763 if (Subtarget->isThumb2())
19764 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19765 // Thumb1 only has 8-bit unsigned immediate.
19766 return AbsImm >= 0 && AbsImm <= 255;
19767}
19768
19769// Return false to prevent folding
19770// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19771// if the folding leads to worse code.
19773 SDValue ConstNode) const {
19774 // Let the DAGCombiner decide for vector types and large types.
19775 const EVT VT = AddNode.getValueType();
19776 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19777 return true;
19778
19779 // It is worse if c0 is legal add immediate, while c1*c0 is not
19780 // and has to be composed by at least two instructions.
19781 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19782 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19783 const int64_t C0 = C0Node->getSExtValue();
19784 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19786 return true;
19787 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19788 return false;
19789
19790 // Default to true and let the DAGCombiner decide.
19791 return true;
19792}
19793
19795 bool isSEXTLoad, SDValue &Base,
19796 SDValue &Offset, bool &isInc,
19797 SelectionDAG &DAG) {
19798 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19799 return false;
19800
19801 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19802 // AddressingMode 3
19803 Base = Ptr->getOperand(0);
19804 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19805 int RHSC = (int)RHS->getZExtValue();
19806 if (RHSC < 0 && RHSC > -256) {
19807 assert(Ptr->getOpcode() == ISD::ADD);
19808 isInc = false;
19809 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19810 return true;
19811 }
19812 }
19813 isInc = (Ptr->getOpcode() == ISD::ADD);
19814 Offset = Ptr->getOperand(1);
19815 return true;
19816 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19817 // AddressingMode 2
19818 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19819 int RHSC = (int)RHS->getZExtValue();
19820 if (RHSC < 0 && RHSC > -0x1000) {
19821 assert(Ptr->getOpcode() == ISD::ADD);
19822 isInc = false;
19823 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19824 Base = Ptr->getOperand(0);
19825 return true;
19826 }
19827 }
19828
19829 if (Ptr->getOpcode() == ISD::ADD) {
19830 isInc = true;
19831 ARM_AM::ShiftOpc ShOpcVal=
19832 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19833 if (ShOpcVal != ARM_AM::no_shift) {
19834 Base = Ptr->getOperand(1);
19835 Offset = Ptr->getOperand(0);
19836 } else {
19837 Base = Ptr->getOperand(0);
19838 Offset = Ptr->getOperand(1);
19839 }
19840 return true;
19841 }
19842
19843 isInc = (Ptr->getOpcode() == ISD::ADD);
19844 Base = Ptr->getOperand(0);
19845 Offset = Ptr->getOperand(1);
19846 return true;
19847 }
19848
19849 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19850 return false;
19851}
19852
19854 bool isSEXTLoad, SDValue &Base,
19855 SDValue &Offset, bool &isInc,
19856 SelectionDAG &DAG) {
19857 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19858 return false;
19859
19860 Base = Ptr->getOperand(0);
19861 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19862 int RHSC = (int)RHS->getZExtValue();
19863 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19864 assert(Ptr->getOpcode() == ISD::ADD);
19865 isInc = false;
19866 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19867 return true;
19868 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19869 isInc = Ptr->getOpcode() == ISD::ADD;
19870 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19871 return true;
19872 }
19873 }
19874
19875 return false;
19876}
19877
19878static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19879 bool isSEXTLoad, bool IsMasked, bool isLE,
19881 bool &isInc, SelectionDAG &DAG) {
19882 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19883 return false;
19884 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19885 return false;
19886
19887 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19888 // as opposed to a vldrw.32). This can allow extra addressing modes or
19889 // alignments for what is otherwise an equivalent instruction.
19890 bool CanChangeType = isLE && !IsMasked;
19891
19892 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19893 int RHSC = (int)RHS->getZExtValue();
19894
19895 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19896 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19897 assert(Ptr->getOpcode() == ISD::ADD);
19898 isInc = false;
19899 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19900 return true;
19901 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19902 isInc = Ptr->getOpcode() == ISD::ADD;
19903 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19904 return true;
19905 }
19906 return false;
19907 };
19908
19909 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19910 // (in BE/masked) type.
19911 Base = Ptr->getOperand(0);
19912 if (VT == MVT::v4i16) {
19913 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19914 return true;
19915 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19916 if (IsInRange(RHSC, 0x80, 1))
19917 return true;
19918 } else if (Alignment >= 4 &&
19919 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19920 IsInRange(RHSC, 0x80, 4))
19921 return true;
19922 else if (Alignment >= 2 &&
19923 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19924 IsInRange(RHSC, 0x80, 2))
19925 return true;
19926 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19927 return true;
19928 return false;
19929}
19930
19931/// getPreIndexedAddressParts - returns true by value, base pointer and
19932/// offset pointer and addressing mode by reference if the node's address
19933/// can be legally represented as pre-indexed load / store address.
19934bool
19936 SDValue &Offset,
19938 SelectionDAG &DAG) const {
19939 if (Subtarget->isThumb1Only())
19940 return false;
19941
19942 EVT VT;
19943 SDValue Ptr;
19944 Align Alignment;
19945 bool isSEXTLoad = false;
19946 bool IsMasked = false;
19947 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19948 Ptr = LD->getBasePtr();
19949 VT = LD->getMemoryVT();
19950 Alignment = LD->getAlign();
19951 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19952 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19953 Ptr = ST->getBasePtr();
19954 VT = ST->getMemoryVT();
19955 Alignment = ST->getAlign();
19956 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19957 Ptr = LD->getBasePtr();
19958 VT = LD->getMemoryVT();
19959 Alignment = LD->getAlign();
19960 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19961 IsMasked = true;
19962 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19963 Ptr = ST->getBasePtr();
19964 VT = ST->getMemoryVT();
19965 Alignment = ST->getAlign();
19966 IsMasked = true;
19967 } else
19968 return false;
19969
19970 bool isInc;
19971 bool isLegal = false;
19972 if (VT.isVector())
19973 isLegal = Subtarget->hasMVEIntegerOps() &&
19975 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19976 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19977 else {
19978 if (Subtarget->isThumb2())
19979 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19980 Offset, isInc, DAG);
19981 else
19982 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19983 Offset, isInc, DAG);
19984 }
19985 if (!isLegal)
19986 return false;
19987
19988 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19989 return true;
19990}
19991
19992/// getPostIndexedAddressParts - returns true by value, base pointer and
19993/// offset pointer and addressing mode by reference if this node can be
19994/// combined with a load / store to form a post-indexed load / store.
19996 SDValue &Base,
19997 SDValue &Offset,
19999 SelectionDAG &DAG) const {
20000 EVT VT;
20001 SDValue Ptr;
20002 Align Alignment;
20003 bool isSEXTLoad = false, isNonExt;
20004 bool IsMasked = false;
20005 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20006 VT = LD->getMemoryVT();
20007 Ptr = LD->getBasePtr();
20008 Alignment = LD->getAlign();
20009 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20010 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20011 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20012 VT = ST->getMemoryVT();
20013 Ptr = ST->getBasePtr();
20014 Alignment = ST->getAlign();
20015 isNonExt = !ST->isTruncatingStore();
20016 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
20017 VT = LD->getMemoryVT();
20018 Ptr = LD->getBasePtr();
20019 Alignment = LD->getAlign();
20020 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
20021 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
20022 IsMasked = true;
20023 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
20024 VT = ST->getMemoryVT();
20025 Ptr = ST->getBasePtr();
20026 Alignment = ST->getAlign();
20027 isNonExt = !ST->isTruncatingStore();
20028 IsMasked = true;
20029 } else
20030 return false;
20031
20032 if (Subtarget->isThumb1Only()) {
20033 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
20034 // must be non-extending/truncating, i32, with an offset of 4.
20035 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
20036 if (Op->getOpcode() != ISD::ADD || !isNonExt)
20037 return false;
20038 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20039 if (!RHS || RHS->getZExtValue() != 4)
20040 return false;
20041 if (Alignment < Align(4))
20042 return false;
20043
20044 Offset = Op->getOperand(1);
20045 Base = Op->getOperand(0);
20046 AM = ISD::POST_INC;
20047 return true;
20048 }
20049
20050 bool isInc;
20051 bool isLegal = false;
20052 if (VT.isVector())
20053 isLegal = Subtarget->hasMVEIntegerOps() &&
20054 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
20055 Subtarget->isLittle(), Base, Offset,
20056 isInc, DAG);
20057 else {
20058 if (Subtarget->isThumb2())
20059 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20060 isInc, DAG);
20061 else
20062 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20063 isInc, DAG);
20064 }
20065 if (!isLegal)
20066 return false;
20067
20068 if (Ptr != Base) {
20069 // Swap base ptr and offset to catch more post-index load / store when
20070 // it's legal. In Thumb2 mode, offset must be an immediate.
20071 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20072 !Subtarget->isThumb2())
20074
20075 // Post-indexed load / store update the base pointer.
20076 if (Ptr != Base)
20077 return false;
20078 }
20079
20080 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
20081 return true;
20082}
20083
20085 KnownBits &Known,
20086 const APInt &DemandedElts,
20087 const SelectionDAG &DAG,
20088 unsigned Depth) const {
20089 unsigned BitWidth = Known.getBitWidth();
20090 Known.resetAll();
20091 switch (Op.getOpcode()) {
20092 default: break;
20093 case ARMISD::ADDC:
20094 case ARMISD::ADDE:
20095 case ARMISD::SUBC:
20096 case ARMISD::SUBE:
20097 // Special cases when we convert a carry to a boolean.
20098 if (Op.getResNo() == 0) {
20099 SDValue LHS = Op.getOperand(0);
20100 SDValue RHS = Op.getOperand(1);
20101 // (ADDE 0, 0, C) will give us a single bit.
20102 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20105 return;
20106 }
20107 }
20108 break;
20109 case ARMISD::CMOV: {
20110 // Bits are known zero/one if known on the LHS and RHS.
20111 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20112 if (Known.isUnknown())
20113 return;
20114
20115 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20116 Known = Known.intersectWith(KnownRHS);
20117 return;
20118 }
20120 Intrinsic::ID IntID =
20121 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20122 switch (IntID) {
20123 default: return;
20124 case Intrinsic::arm_ldaex:
20125 case Intrinsic::arm_ldrex: {
20126 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20127 unsigned MemBits = VT.getScalarSizeInBits();
20128 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20129 return;
20130 }
20131 }
20132 }
20133 case ARMISD::BFI: {
20134 // Conservatively, we can recurse down the first operand
20135 // and just mask out all affected bits.
20136 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20137
20138 // The operand to BFI is already a mask suitable for removing the bits it
20139 // sets.
20140 const APInt &Mask = Op.getConstantOperandAPInt(2);
20141 Known.Zero &= Mask;
20142 Known.One &= Mask;
20143 return;
20144 }
20145 case ARMISD::VGETLANEs:
20146 case ARMISD::VGETLANEu: {
20147 const SDValue &SrcSV = Op.getOperand(0);
20148 EVT VecVT = SrcSV.getValueType();
20149 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20150 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20151 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20152 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20153 "VGETLANE index out of bounds");
20154 unsigned Idx = Pos->getZExtValue();
20155 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20156 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20157
20158 EVT VT = Op.getValueType();
20159 const unsigned DstSz = VT.getScalarSizeInBits();
20160 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20161 (void)SrcSz;
20162 assert(SrcSz == Known.getBitWidth());
20163 assert(DstSz > SrcSz);
20164 if (Op.getOpcode() == ARMISD::VGETLANEs)
20165 Known = Known.sext(DstSz);
20166 else {
20167 Known = Known.zext(DstSz);
20168 }
20169 assert(DstSz == Known.getBitWidth());
20170 break;
20171 }
20172 case ARMISD::VMOVrh: {
20173 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20174 assert(KnownOp.getBitWidth() == 16);
20175 Known = KnownOp.zext(32);
20176 break;
20177 }
20178 case ARMISD::CSINC:
20179 case ARMISD::CSINV:
20180 case ARMISD::CSNEG: {
20181 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20182 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20183
20184 // The result is either:
20185 // CSINC: KnownOp0 or KnownOp1 + 1
20186 // CSINV: KnownOp0 or ~KnownOp1
20187 // CSNEG: KnownOp0 or KnownOp1 * -1
20188 if (Op.getOpcode() == ARMISD::CSINC)
20189 KnownOp1 =
20190 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20191 else if (Op.getOpcode() == ARMISD::CSINV)
20192 std::swap(KnownOp1.Zero, KnownOp1.One);
20193 else if (Op.getOpcode() == ARMISD::CSNEG)
20194 KnownOp1 = KnownBits::mul(
20195 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20196
20197 Known = KnownOp0.intersectWith(KnownOp1);
20198 break;
20199 }
20200 }
20201}
20202
20204 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20205 TargetLoweringOpt &TLO) const {
20206 // Delay optimization, so we don't have to deal with illegal types, or block
20207 // optimizations.
20208 if (!TLO.LegalOps)
20209 return false;
20210
20211 // Only optimize AND for now.
20212 if (Op.getOpcode() != ISD::AND)
20213 return false;
20214
20215 EVT VT = Op.getValueType();
20216
20217 // Ignore vectors.
20218 if (VT.isVector())
20219 return false;
20220
20221 assert(VT == MVT::i32 && "Unexpected integer type");
20222
20223 // Make sure the RHS really is a constant.
20224 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20225 if (!C)
20226 return false;
20227
20228 unsigned Mask = C->getZExtValue();
20229
20230 unsigned Demanded = DemandedBits.getZExtValue();
20231 unsigned ShrunkMask = Mask & Demanded;
20232 unsigned ExpandedMask = Mask | ~Demanded;
20233
20234 // If the mask is all zeros, let the target-independent code replace the
20235 // result with zero.
20236 if (ShrunkMask == 0)
20237 return false;
20238
20239 // If the mask is all ones, erase the AND. (Currently, the target-independent
20240 // code won't do this, so we have to do it explicitly to avoid an infinite
20241 // loop in obscure cases.)
20242 if (ExpandedMask == ~0U)
20243 return TLO.CombineTo(Op, Op.getOperand(0));
20244
20245 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20246 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20247 };
20248 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20249 if (NewMask == Mask)
20250 return true;
20251 SDLoc DL(Op);
20252 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20253 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20254 return TLO.CombineTo(Op, NewOp);
20255 };
20256
20257 // Prefer uxtb mask.
20258 if (IsLegalMask(0xFF))
20259 return UseMask(0xFF);
20260
20261 // Prefer uxth mask.
20262 if (IsLegalMask(0xFFFF))
20263 return UseMask(0xFFFF);
20264
20265 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20266 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20267 if (ShrunkMask < 256)
20268 return UseMask(ShrunkMask);
20269
20270 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20271 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20272 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20273 return UseMask(ExpandedMask);
20274
20275 // Potential improvements:
20276 //
20277 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20278 // We could try to prefer Thumb1 immediates which can be lowered to a
20279 // two-instruction sequence.
20280 // We could try to recognize more legal ARM/Thumb2 immediates here.
20281
20282 return false;
20283}
20284
20286 SDValue Op, const APInt &OriginalDemandedBits,
20287 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20288 unsigned Depth) const {
20289 unsigned Opc = Op.getOpcode();
20290
20291 switch (Opc) {
20292 case ARMISD::ASRL:
20293 case ARMISD::LSRL: {
20294 // If this is result 0 and the other result is unused, see if the demand
20295 // bits allow us to shrink this long shift into a standard small shift in
20296 // the opposite direction.
20297 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20298 isa<ConstantSDNode>(Op->getOperand(2))) {
20299 unsigned ShAmt = Op->getConstantOperandVal(2);
20300 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20301 << (32 - ShAmt)))
20302 return TLO.CombineTo(
20303 Op, TLO.DAG.getNode(
20304 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20305 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20306 }
20307 break;
20308 }
20309 case ARMISD::VBICIMM: {
20310 SDValue Op0 = Op.getOperand(0);
20311 unsigned ModImm = Op.getConstantOperandVal(1);
20312 unsigned EltBits = 0;
20313 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20314 if ((OriginalDemandedBits & Mask) == 0)
20315 return TLO.CombineTo(Op, Op0);
20316 }
20317 }
20318
20320 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20321}
20322
20323//===----------------------------------------------------------------------===//
20324// ARM Inline Assembly Support
20325//===----------------------------------------------------------------------===//
20326
20328 // Looking for "rev" which is V6+.
20329 if (!Subtarget->hasV6Ops())
20330 return false;
20331
20332 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20333 StringRef AsmStr = IA->getAsmString();
20334 SmallVector<StringRef, 4> AsmPieces;
20335 SplitString(AsmStr, AsmPieces, ";\n");
20336
20337 switch (AsmPieces.size()) {
20338 default: return false;
20339 case 1:
20340 AsmStr = AsmPieces[0];
20341 AsmPieces.clear();
20342 SplitString(AsmStr, AsmPieces, " \t,");
20343
20344 // rev $0, $1
20345 if (AsmPieces.size() == 3 &&
20346 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20347 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20348 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20349 if (Ty && Ty->getBitWidth() == 32)
20351 }
20352 break;
20353 }
20354
20355 return false;
20356}
20357
20358const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20359 // At this point, we have to lower this constraint to something else, so we
20360 // lower it to an "r" or "w". However, by doing this we will force the result
20361 // to be in register, while the X constraint is much more permissive.
20362 //
20363 // Although we are correct (we are free to emit anything, without
20364 // constraints), we might break use cases that would expect us to be more
20365 // efficient and emit something else.
20366 if (!Subtarget->hasVFP2Base())
20367 return "r";
20368 if (ConstraintVT.isFloatingPoint())
20369 return "w";
20370 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20371 (ConstraintVT.getSizeInBits() == 64 ||
20372 ConstraintVT.getSizeInBits() == 128))
20373 return "w";
20374
20375 return "r";
20376}
20377
20378/// getConstraintType - Given a constraint letter, return the type of
20379/// constraint it is for this target.
20382 unsigned S = Constraint.size();
20383 if (S == 1) {
20384 switch (Constraint[0]) {
20385 default: break;
20386 case 'l': return C_RegisterClass;
20387 case 'w': return C_RegisterClass;
20388 case 'h': return C_RegisterClass;
20389 case 'x': return C_RegisterClass;
20390 case 't': return C_RegisterClass;
20391 case 'j': return C_Immediate; // Constant for movw.
20392 // An address with a single base register. Due to the way we
20393 // currently handle addresses it is the same as an 'r' memory constraint.
20394 case 'Q': return C_Memory;
20395 }
20396 } else if (S == 2) {
20397 switch (Constraint[0]) {
20398 default: break;
20399 case 'T': return C_RegisterClass;
20400 // All 'U+' constraints are addresses.
20401 case 'U': return C_Memory;
20402 }
20403 }
20404 return TargetLowering::getConstraintType(Constraint);
20405}
20406
20407/// Examine constraint type and operand type and determine a weight value.
20408/// This object must already have been set up with the operand type
20409/// and the current alternative constraint selected.
20412 AsmOperandInfo &info, const char *constraint) const {
20414 Value *CallOperandVal = info.CallOperandVal;
20415 // If we don't have a value, we can't do a match,
20416 // but allow it at the lowest weight.
20417 if (!CallOperandVal)
20418 return CW_Default;
20419 Type *type = CallOperandVal->getType();
20420 // Look at the constraint type.
20421 switch (*constraint) {
20422 default:
20424 break;
20425 case 'l':
20426 if (type->isIntegerTy()) {
20427 if (Subtarget->isThumb())
20428 weight = CW_SpecificReg;
20429 else
20430 weight = CW_Register;
20431 }
20432 break;
20433 case 'w':
20434 if (type->isFloatingPointTy())
20435 weight = CW_Register;
20436 break;
20437 }
20438 return weight;
20439}
20440
20441using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20442
20444 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20445 switch (Constraint.size()) {
20446 case 1:
20447 // GCC ARM Constraint Letters
20448 switch (Constraint[0]) {
20449 case 'l': // Low regs or general regs.
20450 if (Subtarget->isThumb())
20451 return RCPair(0U, &ARM::tGPRRegClass);
20452 return RCPair(0U, &ARM::GPRRegClass);
20453 case 'h': // High regs or no regs.
20454 if (Subtarget->isThumb())
20455 return RCPair(0U, &ARM::hGPRRegClass);
20456 break;
20457 case 'r':
20458 if (Subtarget->isThumb1Only())
20459 return RCPair(0U, &ARM::tGPRRegClass);
20460 return RCPair(0U, &ARM::GPRRegClass);
20461 case 'w':
20462 if (VT == MVT::Other)
20463 break;
20464 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20465 return RCPair(0U, &ARM::SPRRegClass);
20466 if (VT.getSizeInBits() == 64)
20467 return RCPair(0U, &ARM::DPRRegClass);
20468 if (VT.getSizeInBits() == 128)
20469 return RCPair(0U, &ARM::QPRRegClass);
20470 break;
20471 case 'x':
20472 if (VT == MVT::Other)
20473 break;
20474 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20475 return RCPair(0U, &ARM::SPR_8RegClass);
20476 if (VT.getSizeInBits() == 64)
20477 return RCPair(0U, &ARM::DPR_8RegClass);
20478 if (VT.getSizeInBits() == 128)
20479 return RCPair(0U, &ARM::QPR_8RegClass);
20480 break;
20481 case 't':
20482 if (VT == MVT::Other)
20483 break;
20484 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20485 return RCPair(0U, &ARM::SPRRegClass);
20486 if (VT.getSizeInBits() == 64)
20487 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20488 if (VT.getSizeInBits() == 128)
20489 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20490 break;
20491 }
20492 break;
20493
20494 case 2:
20495 if (Constraint[0] == 'T') {
20496 switch (Constraint[1]) {
20497 default:
20498 break;
20499 case 'e':
20500 return RCPair(0U, &ARM::tGPREvenRegClass);
20501 case 'o':
20502 return RCPair(0U, &ARM::tGPROddRegClass);
20503 }
20504 }
20505 break;
20506
20507 default:
20508 break;
20509 }
20510
20511 if (StringRef("{cc}").equals_insensitive(Constraint))
20512 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20513
20514 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20515}
20516
20517/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20518/// vector. If it is invalid, don't add anything to Ops.
20520 StringRef Constraint,
20521 std::vector<SDValue> &Ops,
20522 SelectionDAG &DAG) const {
20523 SDValue Result;
20524
20525 // Currently only support length 1 constraints.
20526 if (Constraint.size() != 1)
20527 return;
20528
20529 char ConstraintLetter = Constraint[0];
20530 switch (ConstraintLetter) {
20531 default: break;
20532 case 'j':
20533 case 'I': case 'J': case 'K': case 'L':
20534 case 'M': case 'N': case 'O':
20535 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20536 if (!C)
20537 return;
20538
20539 int64_t CVal64 = C->getSExtValue();
20540 int CVal = (int) CVal64;
20541 // None of these constraints allow values larger than 32 bits. Check
20542 // that the value fits in an int.
20543 if (CVal != CVal64)
20544 return;
20545
20546 switch (ConstraintLetter) {
20547 case 'j':
20548 // Constant suitable for movw, must be between 0 and
20549 // 65535.
20550 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20551 if (CVal >= 0 && CVal <= 65535)
20552 break;
20553 return;
20554 case 'I':
20555 if (Subtarget->isThumb1Only()) {
20556 // This must be a constant between 0 and 255, for ADD
20557 // immediates.
20558 if (CVal >= 0 && CVal <= 255)
20559 break;
20560 } else if (Subtarget->isThumb2()) {
20561 // A constant that can be used as an immediate value in a
20562 // data-processing instruction.
20563 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20564 break;
20565 } else {
20566 // A constant that can be used as an immediate value in a
20567 // data-processing instruction.
20568 if (ARM_AM::getSOImmVal(CVal) != -1)
20569 break;
20570 }
20571 return;
20572
20573 case 'J':
20574 if (Subtarget->isThumb1Only()) {
20575 // This must be a constant between -255 and -1, for negated ADD
20576 // immediates. This can be used in GCC with an "n" modifier that
20577 // prints the negated value, for use with SUB instructions. It is
20578 // not useful otherwise but is implemented for compatibility.
20579 if (CVal >= -255 && CVal <= -1)
20580 break;
20581 } else {
20582 // This must be a constant between -4095 and 4095. It is not clear
20583 // what this constraint is intended for. Implemented for
20584 // compatibility with GCC.
20585 if (CVal >= -4095 && CVal <= 4095)
20586 break;
20587 }
20588 return;
20589
20590 case 'K':
20591 if (Subtarget->isThumb1Only()) {
20592 // A 32-bit value where only one byte has a nonzero value. Exclude
20593 // zero to match GCC. This constraint is used by GCC internally for
20594 // constants that can be loaded with a move/shift combination.
20595 // It is not useful otherwise but is implemented for compatibility.
20596 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20597 break;
20598 } else if (Subtarget->isThumb2()) {
20599 // A constant whose bitwise inverse can be used as an immediate
20600 // value in a data-processing instruction. This can be used in GCC
20601 // with a "B" modifier that prints the inverted value, for use with
20602 // BIC and MVN instructions. It is not useful otherwise but is
20603 // implemented for compatibility.
20604 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20605 break;
20606 } else {
20607 // A constant whose bitwise inverse can be used as an immediate
20608 // value in a data-processing instruction. This can be used in GCC
20609 // with a "B" modifier that prints the inverted value, for use with
20610 // BIC and MVN instructions. It is not useful otherwise but is
20611 // implemented for compatibility.
20612 if (ARM_AM::getSOImmVal(~CVal) != -1)
20613 break;
20614 }
20615 return;
20616
20617 case 'L':
20618 if (Subtarget->isThumb1Only()) {
20619 // This must be a constant between -7 and 7,
20620 // for 3-operand ADD/SUB immediate instructions.
20621 if (CVal >= -7 && CVal < 7)
20622 break;
20623 } else if (Subtarget->isThumb2()) {
20624 // A constant whose negation can be used as an immediate value in a
20625 // data-processing instruction. This can be used in GCC with an "n"
20626 // modifier that prints the negated value, for use with SUB
20627 // instructions. It is not useful otherwise but is implemented for
20628 // compatibility.
20629 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20630 break;
20631 } else {
20632 // A constant whose negation can be used as an immediate value in a
20633 // data-processing instruction. This can be used in GCC with an "n"
20634 // modifier that prints the negated value, for use with SUB
20635 // instructions. It is not useful otherwise but is implemented for
20636 // compatibility.
20637 if (ARM_AM::getSOImmVal(-CVal) != -1)
20638 break;
20639 }
20640 return;
20641
20642 case 'M':
20643 if (Subtarget->isThumb1Only()) {
20644 // This must be a multiple of 4 between 0 and 1020, for
20645 // ADD sp + immediate.
20646 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20647 break;
20648 } else {
20649 // A power of two or a constant between 0 and 32. This is used in
20650 // GCC for the shift amount on shifted register operands, but it is
20651 // useful in general for any shift amounts.
20652 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20653 break;
20654 }
20655 return;
20656
20657 case 'N':
20658 if (Subtarget->isThumb1Only()) {
20659 // This must be a constant between 0 and 31, for shift amounts.
20660 if (CVal >= 0 && CVal <= 31)
20661 break;
20662 }
20663 return;
20664
20665 case 'O':
20666 if (Subtarget->isThumb1Only()) {
20667 // This must be a multiple of 4 between -508 and 508, for
20668 // ADD/SUB sp = sp + immediate.
20669 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20670 break;
20671 }
20672 return;
20673 }
20674 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20675 break;
20676 }
20677
20678 if (Result.getNode()) {
20679 Ops.push_back(Result);
20680 return;
20681 }
20682 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20683}
20684
20686 const SDNode *N, MVT::SimpleValueType SVT) {
20687 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20688 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20689 "Unhandled Opcode in getDivRemLibcall");
20690 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20691 N->getOpcode() == ISD::SREM;
20692 RTLIB::Libcall LC;
20693 switch (SVT) {
20694 default: llvm_unreachable("Unexpected request for libcall!");
20695 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20696 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20697 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20698 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20699 }
20700 return LC;
20701}
20702
20704 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20705 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20706 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20707 "Unhandled Opcode in getDivRemArgList");
20708 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20709 N->getOpcode() == ISD::SREM;
20712 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20713 EVT ArgVT = N->getOperand(i).getValueType();
20714 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20715 Entry.Node = N->getOperand(i);
20716 Entry.Ty = ArgTy;
20717 Entry.IsSExt = isSigned;
20718 Entry.IsZExt = !isSigned;
20719 Args.push_back(Entry);
20720 }
20721 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20722 std::swap(Args[0], Args[1]);
20723 return Args;
20724}
20725
20726SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20727 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20728 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20729 Subtarget->isTargetWindows()) &&
20730 "Register-based DivRem lowering only");
20731 unsigned Opcode = Op->getOpcode();
20732 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20733 "Invalid opcode for Div/Rem lowering");
20734 bool isSigned = (Opcode == ISD::SDIVREM);
20735 EVT VT = Op->getValueType(0);
20736 SDLoc dl(Op);
20737
20738 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20740 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20741 SDValue Res0 =
20742 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20743 SDValue Res1 =
20744 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20745 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20746 {Res0, Res1});
20747 }
20748 }
20749
20750 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20751
20752 // If the target has hardware divide, use divide + multiply + subtract:
20753 // div = a / b
20754 // rem = a - b * div
20755 // return {div, rem}
20756 // This should be lowered into UDIV/SDIV + MLS later on.
20757 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20758 : Subtarget->hasDivideInARMMode();
20759 if (hasDivide && Op->getValueType(0).isSimple() &&
20760 Op->getSimpleValueType(0) == MVT::i32) {
20761 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20762 const SDValue Dividend = Op->getOperand(0);
20763 const SDValue Divisor = Op->getOperand(1);
20764 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20765 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20766 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20767
20768 SDValue Values[2] = {Div, Rem};
20769 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20770 }
20771
20772 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20773 VT.getSimpleVT().SimpleTy);
20774 SDValue InChain = DAG.getEntryNode();
20775
20777 DAG.getContext(),
20778 Subtarget);
20779
20782
20783 Type *RetTy = StructType::get(Ty, Ty);
20784
20785 if (Subtarget->isTargetWindows())
20786 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20787
20789 CLI.setDebugLoc(dl).setChain(InChain)
20790 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20792
20793 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20794 return CallInfo.first;
20795}
20796
20797// Lowers REM using divmod helpers
20798// see RTABI section 4.2/4.3
20799SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20800 EVT VT = N->getValueType(0);
20801
20802 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20804 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20805 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20806 Result[0], Result[1]);
20807 }
20808
20809 // Build return types (div and rem)
20810 std::vector<Type*> RetTyParams;
20811 Type *RetTyElement;
20812
20813 switch (VT.getSimpleVT().SimpleTy) {
20814 default: llvm_unreachable("Unexpected request for libcall!");
20815 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20816 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20817 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20818 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20819 }
20820
20821 RetTyParams.push_back(RetTyElement);
20822 RetTyParams.push_back(RetTyElement);
20823 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20824 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20825
20826 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20827 SimpleTy);
20828 SDValue InChain = DAG.getEntryNode();
20830 Subtarget);
20831 bool isSigned = N->getOpcode() == ISD::SREM;
20834
20835 if (Subtarget->isTargetWindows())
20836 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20837
20838 // Lower call
20839 CallLoweringInfo CLI(DAG);
20840 CLI.setChain(InChain)
20841 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20843 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20844
20845 // Return second (rem) result operand (first contains div)
20846 SDNode *ResNode = CallResult.first.getNode();
20847 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20848 return ResNode->getOperand(1);
20849}
20850
20851SDValue
20852ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20853 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20854 SDLoc DL(Op);
20855
20856 // Get the inputs.
20857 SDValue Chain = Op.getOperand(0);
20858 SDValue Size = Op.getOperand(1);
20859
20861 "no-stack-arg-probe")) {
20863 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20864 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20865 Chain = SP.getValue(1);
20866 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20867 if (Align)
20868 SP =
20869 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20870 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20871 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20872 SDValue Ops[2] = { SP, Chain };
20873 return DAG.getMergeValues(Ops, DL);
20874 }
20875
20876 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20877 DAG.getConstant(2, DL, MVT::i32));
20878
20879 SDValue Glue;
20880 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20881 Glue = Chain.getValue(1);
20882
20883 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20884 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20885
20886 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20887 Chain = NewSP.getValue(1);
20888
20889 SDValue Ops[2] = { NewSP, Chain };
20890 return DAG.getMergeValues(Ops, DL);
20891}
20892
20893SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20894 bool IsStrict = Op->isStrictFPOpcode();
20895 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20896 const unsigned DstSz = Op.getValueType().getSizeInBits();
20897 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20898 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20899 "Unexpected type for custom-lowering FP_EXTEND");
20900
20901 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20902 "With both FP DP and 16, any FP conversion is legal!");
20903
20904 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20905 "With FP16, 16 to 32 conversion is legal!");
20906
20907 // Converting from 32 -> 64 is valid if we have FP64.
20908 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20909 // FIXME: Remove this when we have strict fp instruction selection patterns
20910 if (IsStrict) {
20911 SDLoc Loc(Op);
20913 Loc, Op.getValueType(), SrcVal);
20914 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20915 }
20916 return Op;
20917 }
20918
20919 // Either we are converting from 16 -> 64, without FP16 and/or
20920 // FP.double-precision or without Armv8-fp. So we must do it in two
20921 // steps.
20922 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20923 // without FP16. So we must do a function call.
20924 SDLoc Loc(Op);
20925 RTLIB::Libcall LC;
20926 MakeLibCallOptions CallOptions;
20927 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20928 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20929 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20930 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20931 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20932 if (Supported) {
20933 if (IsStrict) {
20934 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20935 {DstVT, MVT::Other}, {Chain, SrcVal});
20936 Chain = SrcVal.getValue(1);
20937 } else {
20938 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20939 }
20940 } else {
20941 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20942 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20943 "Unexpected type for custom-lowering FP_EXTEND");
20944 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20945 Loc, Chain);
20946 }
20947 }
20948
20949 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20950}
20951
20952SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20953 bool IsStrict = Op->isStrictFPOpcode();
20954
20955 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20956 EVT SrcVT = SrcVal.getValueType();
20957 EVT DstVT = Op.getValueType();
20958 const unsigned DstSz = Op.getValueType().getSizeInBits();
20959 const unsigned SrcSz = SrcVT.getSizeInBits();
20960 (void)DstSz;
20961 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20962 "Unexpected type for custom-lowering FP_ROUND");
20963
20964 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20965 "With both FP DP and 16, any FP conversion is legal!");
20966
20967 SDLoc Loc(Op);
20968
20969 // Instruction from 32 -> 16 if hasFP16 is valid
20970 if (SrcSz == 32 && Subtarget->hasFP16())
20971 return Op;
20972
20973 // Lib call from 32 -> 16 / 64 -> [32, 16]
20974 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20975 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20976 "Unexpected type for custom-lowering FP_ROUND");
20977 MakeLibCallOptions CallOptions;
20978 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20980 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20981 Loc, Chain);
20982 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20983}
20984
20985bool
20987 // The ARM target isn't yet aware of offsets.
20988 return false;
20989}
20990
20992 if (v == 0xffffffff)
20993 return false;
20994
20995 // there can be 1's on either or both "outsides", all the "inside"
20996 // bits must be 0's
20997 return isShiftedMask_32(~v);
20998}
20999
21000/// isFPImmLegal - Returns true if the target can instruction select the
21001/// specified FP immediate natively. If false, the legalizer will
21002/// materialize the FP immediate as a load from a constant pool.
21004 bool ForCodeSize) const {
21005 if (!Subtarget->hasVFP3Base())
21006 return false;
21007 if (VT == MVT::f16 && Subtarget->hasFullFP16())
21008 return ARM_AM::getFP16Imm(Imm) != -1;
21009 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
21010 ARM_AM::getFP32FP16Imm(Imm) != -1)
21011 return true;
21012 if (VT == MVT::f32)
21013 return ARM_AM::getFP32Imm(Imm) != -1;
21014 if (VT == MVT::f64 && Subtarget->hasFP64())
21015 return ARM_AM::getFP64Imm(Imm) != -1;
21016 return false;
21017}
21018
21019/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
21020/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
21021/// specified in the intrinsic calls.
21023 const CallInst &I,
21024 MachineFunction &MF,
21025 unsigned Intrinsic) const {
21026 switch (Intrinsic) {
21027 case Intrinsic::arm_neon_vld1:
21028 case Intrinsic::arm_neon_vld2:
21029 case Intrinsic::arm_neon_vld3:
21030 case Intrinsic::arm_neon_vld4:
21031 case Intrinsic::arm_neon_vld2lane:
21032 case Intrinsic::arm_neon_vld3lane:
21033 case Intrinsic::arm_neon_vld4lane:
21034 case Intrinsic::arm_neon_vld2dup:
21035 case Intrinsic::arm_neon_vld3dup:
21036 case Intrinsic::arm_neon_vld4dup: {
21038 // Conservatively set memVT to the entire set of vectors loaded.
21039 auto &DL = I.getDataLayout();
21040 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21041 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21042 Info.ptrVal = I.getArgOperand(0);
21043 Info.offset = 0;
21044 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21045 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21046 // volatile loads with NEON intrinsics not supported
21048 return true;
21049 }
21050 case Intrinsic::arm_neon_vld1x2:
21051 case Intrinsic::arm_neon_vld1x3:
21052 case Intrinsic::arm_neon_vld1x4: {
21054 // Conservatively set memVT to the entire set of vectors loaded.
21055 auto &DL = I.getDataLayout();
21056 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21057 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21058 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21059 Info.offset = 0;
21060 Info.align.reset();
21061 // volatile loads with NEON intrinsics not supported
21063 return true;
21064 }
21065 case Intrinsic::arm_neon_vst1:
21066 case Intrinsic::arm_neon_vst2:
21067 case Intrinsic::arm_neon_vst3:
21068 case Intrinsic::arm_neon_vst4:
21069 case Intrinsic::arm_neon_vst2lane:
21070 case Intrinsic::arm_neon_vst3lane:
21071 case Intrinsic::arm_neon_vst4lane: {
21073 // Conservatively set memVT to the entire set of vectors stored.
21074 auto &DL = I.getDataLayout();
21075 unsigned NumElts = 0;
21076 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21077 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21078 if (!ArgTy->isVectorTy())
21079 break;
21080 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21081 }
21082 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21083 Info.ptrVal = I.getArgOperand(0);
21084 Info.offset = 0;
21085 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21086 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21087 // volatile stores with NEON intrinsics not supported
21089 return true;
21090 }
21091 case Intrinsic::arm_neon_vst1x2:
21092 case Intrinsic::arm_neon_vst1x3:
21093 case Intrinsic::arm_neon_vst1x4: {
21095 // Conservatively set memVT to the entire set of vectors stored.
21096 auto &DL = I.getDataLayout();
21097 unsigned NumElts = 0;
21098 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21099 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21100 if (!ArgTy->isVectorTy())
21101 break;
21102 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21103 }
21104 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21105 Info.ptrVal = I.getArgOperand(0);
21106 Info.offset = 0;
21107 Info.align.reset();
21108 // volatile stores with NEON intrinsics not supported
21110 return true;
21111 }
21112 case Intrinsic::arm_mve_vld2q:
21113 case Intrinsic::arm_mve_vld4q: {
21115 // Conservatively set memVT to the entire set of vectors loaded.
21116 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21117 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21118 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21119 Info.ptrVal = I.getArgOperand(0);
21120 Info.offset = 0;
21121 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21122 // volatile loads with MVE intrinsics not supported
21124 return true;
21125 }
21126 case Intrinsic::arm_mve_vst2q:
21127 case Intrinsic::arm_mve_vst4q: {
21129 // Conservatively set memVT to the entire set of vectors stored.
21130 Type *VecTy = I.getArgOperand(1)->getType();
21131 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21132 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21133 Info.ptrVal = I.getArgOperand(0);
21134 Info.offset = 0;
21135 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21136 // volatile stores with MVE intrinsics not supported
21138 return true;
21139 }
21140 case Intrinsic::arm_mve_vldr_gather_base:
21141 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21143 Info.ptrVal = nullptr;
21144 Info.memVT = MVT::getVT(I.getType());
21145 Info.align = Align(1);
21147 return true;
21148 }
21149 case Intrinsic::arm_mve_vldr_gather_base_wb:
21150 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21152 Info.ptrVal = nullptr;
21153 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21154 Info.align = Align(1);
21156 return true;
21157 }
21158 case Intrinsic::arm_mve_vldr_gather_offset:
21159 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21161 Info.ptrVal = nullptr;
21162 MVT DataVT = MVT::getVT(I.getType());
21163 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21164 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21165 DataVT.getVectorNumElements());
21166 Info.align = Align(1);
21168 return true;
21169 }
21170 case Intrinsic::arm_mve_vstr_scatter_base:
21171 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21173 Info.ptrVal = nullptr;
21174 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21175 Info.align = Align(1);
21177 return true;
21178 }
21179 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21180 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21182 Info.ptrVal = nullptr;
21183 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21184 Info.align = Align(1);
21186 return true;
21187 }
21188 case Intrinsic::arm_mve_vstr_scatter_offset:
21189 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21191 Info.ptrVal = nullptr;
21192 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21193 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21194 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21195 DataVT.getVectorNumElements());
21196 Info.align = Align(1);
21198 return true;
21199 }
21200 case Intrinsic::arm_ldaex:
21201 case Intrinsic::arm_ldrex: {
21202 auto &DL = I.getDataLayout();
21203 Type *ValTy = I.getParamElementType(0);
21205 Info.memVT = MVT::getVT(ValTy);
21206 Info.ptrVal = I.getArgOperand(0);
21207 Info.offset = 0;
21208 Info.align = DL.getABITypeAlign(ValTy);
21210 return true;
21211 }
21212 case Intrinsic::arm_stlex:
21213 case Intrinsic::arm_strex: {
21214 auto &DL = I.getDataLayout();
21215 Type *ValTy = I.getParamElementType(1);
21217 Info.memVT = MVT::getVT(ValTy);
21218 Info.ptrVal = I.getArgOperand(1);
21219 Info.offset = 0;
21220 Info.align = DL.getABITypeAlign(ValTy);
21222 return true;
21223 }
21224 case Intrinsic::arm_stlexd:
21225 case Intrinsic::arm_strexd:
21227 Info.memVT = MVT::i64;
21228 Info.ptrVal = I.getArgOperand(2);
21229 Info.offset = 0;
21230 Info.align = Align(8);
21232 return true;
21233
21234 case Intrinsic::arm_ldaexd:
21235 case Intrinsic::arm_ldrexd:
21237 Info.memVT = MVT::i64;
21238 Info.ptrVal = I.getArgOperand(0);
21239 Info.offset = 0;
21240 Info.align = Align(8);
21242 return true;
21243
21244 default:
21245 break;
21246 }
21247
21248 return false;
21249}
21250
21251/// Returns true if it is beneficial to convert a load of a constant
21252/// to just the constant itself.
21254 Type *Ty) const {
21255 assert(Ty->isIntegerTy());
21256
21257 unsigned Bits = Ty->getPrimitiveSizeInBits();
21258 if (Bits == 0 || Bits > 32)
21259 return false;
21260 return true;
21261}
21262
21264 unsigned Index) const {
21266 return false;
21267
21268 return (Index == 0 || Index == ResVT.getVectorNumElements());
21269}
21270
21272 ARM_MB::MemBOpt Domain) const {
21273 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21274
21275 // First, if the target has no DMB, see what fallback we can use.
21276 if (!Subtarget->hasDataBarrier()) {
21277 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21278 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21279 // here.
21280 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21281 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21282 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21283 Builder.getInt32(0), Builder.getInt32(7),
21284 Builder.getInt32(10), Builder.getInt32(5)};
21285 return Builder.CreateCall(MCR, args);
21286 } else {
21287 // Instead of using barriers, atomic accesses on these subtargets use
21288 // libcalls.
21289 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21290 }
21291 } else {
21292 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21293 // Only a full system barrier exists in the M-class architectures.
21294 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21295 Constant *CDomain = Builder.getInt32(Domain);
21296 return Builder.CreateCall(DMB, CDomain);
21297 }
21298}
21299
21300// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21302 Instruction *Inst,
21303 AtomicOrdering Ord) const {
21304 switch (Ord) {
21307 llvm_unreachable("Invalid fence: unordered/non-atomic");
21310 return nullptr; // Nothing to do
21312 if (!Inst->hasAtomicStore())
21313 return nullptr; // Nothing to do
21314 [[fallthrough]];
21317 if (Subtarget->preferISHSTBarriers())
21318 return makeDMB(Builder, ARM_MB::ISHST);
21319 // FIXME: add a comment with a link to documentation justifying this.
21320 else
21321 return makeDMB(Builder, ARM_MB::ISH);
21322 }
21323 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21324}
21325
21327 Instruction *Inst,
21328 AtomicOrdering Ord) const {
21329 switch (Ord) {
21332 llvm_unreachable("Invalid fence: unordered/not-atomic");
21335 return nullptr; // Nothing to do
21339 return makeDMB(Builder, ARM_MB::ISH);
21340 }
21341 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21342}
21343
21344// Loads and stores less than 64-bits are already atomic; ones above that
21345// are doomed anyway, so defer to the default libcall and blame the OS when
21346// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21347// anything for those.
21350 bool has64BitAtomicStore;
21351 if (Subtarget->isMClass())
21352 has64BitAtomicStore = false;
21353 else if (Subtarget->isThumb())
21354 has64BitAtomicStore = Subtarget->hasV7Ops();
21355 else
21356 has64BitAtomicStore = Subtarget->hasV6Ops();
21357
21358 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21359 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21361}
21362
21363// Loads and stores less than 64-bits are already atomic; ones above that
21364// are doomed anyway, so defer to the default libcall and blame the OS when
21365// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21366// anything for those.
21367// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21368// guarantee, see DDI0406C ARM architecture reference manual,
21369// sections A8.8.72-74 LDRD)
21372 bool has64BitAtomicLoad;
21373 if (Subtarget->isMClass())
21374 has64BitAtomicLoad = false;
21375 else if (Subtarget->isThumb())
21376 has64BitAtomicLoad = Subtarget->hasV7Ops();
21377 else
21378 has64BitAtomicLoad = Subtarget->hasV6Ops();
21379
21380 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21381 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21383}
21384
21385// For the real atomic operations, we have ldrex/strex up to 32 bits,
21386// and up to 64 bits on the non-M profiles
21389 if (AI->isFloatingPointOperation())
21391
21392 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21393 bool hasAtomicRMW;
21394 if (Subtarget->isMClass())
21395 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21396 else if (Subtarget->isThumb())
21397 hasAtomicRMW = Subtarget->hasV7Ops();
21398 else
21399 hasAtomicRMW = Subtarget->hasV6Ops();
21400 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21401 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21402 // implement atomicrmw without spilling. If the target address is also on
21403 // the stack and close enough to the spill slot, this can lead to a
21404 // situation where the monitor always gets cleared and the atomic operation
21405 // can never succeed. So at -O0 lower this operation to a CAS loop.
21406 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21409 }
21411}
21412
21413// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21414// bits, and up to 64 bits on the non-M profiles.
21417 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21418 // implement cmpxchg without spilling. If the address being exchanged is also
21419 // on the stack and close enough to the spill slot, this can lead to a
21420 // situation where the monitor always gets cleared and the atomic operation
21421 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21422 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21423 bool HasAtomicCmpXchg;
21424 if (Subtarget->isMClass())
21425 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21426 else if (Subtarget->isThumb())
21427 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21428 else
21429 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21430 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21431 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21434}
21435
21437 const Instruction *I) const {
21438 return InsertFencesForAtomic;
21439}
21440
21442 // ROPI/RWPI are not supported currently.
21443 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21444}
21445
21447 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21449
21450 // MSVC CRT has a global variable holding security cookie.
21451 M.getOrInsertGlobal("__security_cookie",
21452 PointerType::getUnqual(M.getContext()));
21453
21454 // MSVC CRT has a function to validate security cookie.
21455 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21456 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21457 PointerType::getUnqual(M.getContext()));
21458 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21459 F->addParamAttr(0, Attribute::AttrKind::InReg);
21460}
21461
21463 // MSVC CRT has a global variable holding security cookie.
21464 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21465 return M.getGlobalVariable("__security_cookie");
21467}
21468
21470 // MSVC CRT has a function to validate security cookie.
21471 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21472 return M.getFunction("__security_check_cookie");
21474}
21475
21477 unsigned &Cost) const {
21478 // If we do not have NEON, vector types are not natively supported.
21479 if (!Subtarget->hasNEON())
21480 return false;
21481
21482 // Floating point values and vector values map to the same register file.
21483 // Therefore, although we could do a store extract of a vector type, this is
21484 // better to leave at float as we have more freedom in the addressing mode for
21485 // those.
21486 if (VectorTy->isFPOrFPVectorTy())
21487 return false;
21488
21489 // If the index is unknown at compile time, this is very expensive to lower
21490 // and it is not possible to combine the store with the extract.
21491 if (!isa<ConstantInt>(Idx))
21492 return false;
21493
21494 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21495 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21496 // We can do a store + vector extract on any vector that fits perfectly in a D
21497 // or Q register.
21498 if (BitWidth == 64 || BitWidth == 128) {
21499 Cost = 0;
21500 return true;
21501 }
21502 return false;
21503}
21504
21506 return Subtarget->hasV6T2Ops();
21507}
21508
21510 return Subtarget->hasV6T2Ops();
21511}
21512
21514 const Instruction &AndI) const {
21515 if (!Subtarget->hasV7Ops())
21516 return false;
21517
21518 // Sink the `and` instruction only if the mask would fit into a modified
21519 // immediate operand.
21520 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21521 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21522 return false;
21523 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21524 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21525 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21526}
21527
21530 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21531 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21534 ExpansionFactor);
21535}
21536
21538 Value *Addr,
21539 AtomicOrdering Ord) const {
21540 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21541 bool IsAcquire = isAcquireOrStronger(Ord);
21542
21543 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21544 // intrinsic must return {i32, i32} and we have to recombine them into a
21545 // single i64 here.
21546 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21548 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21550
21551 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21552
21553 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21554 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21555 if (!Subtarget->isLittle())
21556 std::swap (Lo, Hi);
21557 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21558 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21559 return Builder.CreateOr(
21560 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21561 }
21562
21563 Type *Tys[] = { Addr->getType() };
21564 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21565 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21566 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21567
21568 CI->addParamAttr(
21569 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21570 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21571}
21572
21574 IRBuilderBase &Builder) const {
21575 if (!Subtarget->hasV7Ops())
21576 return;
21577 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21578 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21579}
21580
21582 Value *Val, Value *Addr,
21583 AtomicOrdering Ord) const {
21584 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21585 bool IsRelease = isReleaseOrStronger(Ord);
21586
21587 // Since the intrinsics must have legal type, the i64 intrinsics take two
21588 // parameters: "i32, i32". We must marshal Val into the appropriate form
21589 // before the call.
21590 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21592 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21594 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21595
21596 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21597 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21598 if (!Subtarget->isLittle())
21599 std::swap(Lo, Hi);
21600 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21601 }
21602
21603 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21604 Type *Tys[] = { Addr->getType() };
21605 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21606
21607 CallInst *CI = Builder.CreateCall(
21608 Strex, {Builder.CreateZExtOrBitCast(
21609 Val, Strex->getFunctionType()->getParamType(0)),
21610 Addr});
21611 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21612 Val->getType()));
21613 return CI;
21614}
21615
21616
21618 return Subtarget->isMClass();
21619}
21620
21621/// A helper function for determining the number of interleaved accesses we
21622/// will generate when lowering accesses of the given type.
21623unsigned
21625 const DataLayout &DL) const {
21626 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21627}
21628
21630 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21631 const DataLayout &DL) const {
21632
21633 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21634 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21635
21636 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21637 return false;
21638
21639 // Ensure the vector doesn't have f16 elements. Even though we could do an
21640 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21641 // f32.
21642 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21643 return false;
21644 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21645 return false;
21646
21647 // Ensure the number of vector elements is greater than 1.
21648 if (VecTy->getNumElements() < 2)
21649 return false;
21650
21651 // Ensure the element type is legal.
21652 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21653 return false;
21654 // And the alignment if high enough under MVE.
21655 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21656 return false;
21657
21658 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21659 // 128 will be split into multiple interleaved accesses.
21660 if (Subtarget->hasNEON() && VecSize == 64)
21661 return true;
21662 return VecSize % 128 == 0;
21663}
21664
21666 if (Subtarget->hasNEON())
21667 return 4;
21668 if (Subtarget->hasMVEIntegerOps())
21671}
21672
21673/// Lower an interleaved load into a vldN intrinsic.
21674///
21675/// E.g. Lower an interleaved load (Factor = 2):
21676/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21677/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21678/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21679///
21680/// Into:
21681/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21682/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21683/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21686 ArrayRef<unsigned> Indices, unsigned Factor) const {
21687 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21688 "Invalid interleave factor");
21689 assert(!Shuffles.empty() && "Empty shufflevector input");
21690 assert(Shuffles.size() == Indices.size() &&
21691 "Unmatched number of shufflevectors and indices");
21692
21693 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21694 Type *EltTy = VecTy->getElementType();
21695
21696 const DataLayout &DL = LI->getDataLayout();
21697 Align Alignment = LI->getAlign();
21698
21699 // Skip if we do not have NEON and skip illegal vector types. We can
21700 // "legalize" wide vector types into multiple interleaved accesses as long as
21701 // the vector types are divisible by 128.
21702 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21703 return false;
21704
21705 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21706
21707 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21708 // load integer vectors first and then convert to pointer vectors.
21709 if (EltTy->isPointerTy())
21710 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21711
21712 IRBuilder<> Builder(LI);
21713
21714 // The base address of the load.
21715 Value *BaseAddr = LI->getPointerOperand();
21716
21717 if (NumLoads > 1) {
21718 // If we're going to generate more than one load, reset the sub-vector type
21719 // to something legal.
21720 VecTy = FixedVectorType::get(VecTy->getElementType(),
21721 VecTy->getNumElements() / NumLoads);
21722 }
21723
21724 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21725
21726 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21727 if (Subtarget->hasNEON()) {
21728 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21729 Type *Tys[] = {VecTy, PtrTy};
21730 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21731 Intrinsic::arm_neon_vld3,
21732 Intrinsic::arm_neon_vld4};
21733 Function *VldnFunc =
21734 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21735
21737 Ops.push_back(BaseAddr);
21738 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21739
21740 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21741 } else {
21742 assert((Factor == 2 || Factor == 4) &&
21743 "expected interleave factor of 2 or 4 for MVE");
21744 Intrinsic::ID LoadInts =
21745 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21746 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21747 Type *Tys[] = {VecTy, PtrTy};
21748 Function *VldnFunc =
21749 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21750
21752 Ops.push_back(BaseAddr);
21753 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21754 }
21755 };
21756
21757 // Holds sub-vectors extracted from the load intrinsic return values. The
21758 // sub-vectors are associated with the shufflevector instructions they will
21759 // replace.
21761
21762 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21763 // If we're generating more than one load, compute the base address of
21764 // subsequent loads as an offset from the previous.
21765 if (LoadCount > 0)
21766 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21767 VecTy->getNumElements() * Factor);
21768
21769 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21770
21771 // Replace uses of each shufflevector with the corresponding vector loaded
21772 // by ldN.
21773 for (unsigned i = 0; i < Shuffles.size(); i++) {
21774 ShuffleVectorInst *SV = Shuffles[i];
21775 unsigned Index = Indices[i];
21776
21777 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21778
21779 // Convert the integer vector to pointer vector if the element is pointer.
21780 if (EltTy->isPointerTy())
21781 SubVec = Builder.CreateIntToPtr(
21782 SubVec,
21784
21785 SubVecs[SV].push_back(SubVec);
21786 }
21787 }
21788
21789 // Replace uses of the shufflevector instructions with the sub-vectors
21790 // returned by the load intrinsic. If a shufflevector instruction is
21791 // associated with more than one sub-vector, those sub-vectors will be
21792 // concatenated into a single wide vector.
21793 for (ShuffleVectorInst *SVI : Shuffles) {
21794 auto &SubVec = SubVecs[SVI];
21795 auto *WideVec =
21796 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21797 SVI->replaceAllUsesWith(WideVec);
21798 }
21799
21800 return true;
21801}
21802
21803/// Lower an interleaved store into a vstN intrinsic.
21804///
21805/// E.g. Lower an interleaved store (Factor = 3):
21806/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21807/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21808/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21809///
21810/// Into:
21811/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21812/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21813/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21814/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21815///
21816/// Note that the new shufflevectors will be removed and we'll only generate one
21817/// vst3 instruction in CodeGen.
21818///
21819/// Example for a more general valid mask (Factor 3). Lower:
21820/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21821/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21822/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21823///
21824/// Into:
21825/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21826/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21827/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21828/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21830 ShuffleVectorInst *SVI,
21831 unsigned Factor) const {
21832 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21833 "Invalid interleave factor");
21834
21835 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21836 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21837
21838 unsigned LaneLen = VecTy->getNumElements() / Factor;
21839 Type *EltTy = VecTy->getElementType();
21840 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21841
21842 const DataLayout &DL = SI->getDataLayout();
21843 Align Alignment = SI->getAlign();
21844
21845 // Skip if we do not have NEON and skip illegal vector types. We can
21846 // "legalize" wide vector types into multiple interleaved accesses as long as
21847 // the vector types are divisible by 128.
21848 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21849 return false;
21850
21851 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21852
21853 Value *Op0 = SVI->getOperand(0);
21854 Value *Op1 = SVI->getOperand(1);
21855 IRBuilder<> Builder(SI);
21856
21857 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21858 // vectors to integer vectors.
21859 if (EltTy->isPointerTy()) {
21860 Type *IntTy = DL.getIntPtrType(EltTy);
21861
21862 // Convert to the corresponding integer vector.
21863 auto *IntVecTy =
21864 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21865 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21866 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21867
21868 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21869 }
21870
21871 // The base address of the store.
21872 Value *BaseAddr = SI->getPointerOperand();
21873
21874 if (NumStores > 1) {
21875 // If we're going to generate more than one store, reset the lane length
21876 // and sub-vector type to something legal.
21877 LaneLen /= NumStores;
21878 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21879 }
21880
21881 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21882
21883 auto Mask = SVI->getShuffleMask();
21884
21885 auto createStoreIntrinsic = [&](Value *BaseAddr,
21886 SmallVectorImpl<Value *> &Shuffles) {
21887 if (Subtarget->hasNEON()) {
21888 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21889 Intrinsic::arm_neon_vst3,
21890 Intrinsic::arm_neon_vst4};
21891 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21892 Type *Tys[] = {PtrTy, SubVecTy};
21893
21895 SI->getModule(), StoreInts[Factor - 2], Tys);
21896
21898 Ops.push_back(BaseAddr);
21899 append_range(Ops, Shuffles);
21900 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21901 Builder.CreateCall(VstNFunc, Ops);
21902 } else {
21903 assert((Factor == 2 || Factor == 4) &&
21904 "expected interleave factor of 2 or 4 for MVE");
21905 Intrinsic::ID StoreInts =
21906 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21907 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21908 Type *Tys[] = {PtrTy, SubVecTy};
21909 Function *VstNFunc =
21910 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21911
21913 Ops.push_back(BaseAddr);
21914 append_range(Ops, Shuffles);
21915 for (unsigned F = 0; F < Factor; F++) {
21916 Ops.push_back(Builder.getInt32(F));
21917 Builder.CreateCall(VstNFunc, Ops);
21918 Ops.pop_back();
21919 }
21920 }
21921 };
21922
21923 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21924 // If we generating more than one store, we compute the base address of
21925 // subsequent stores as an offset from the previous.
21926 if (StoreCount > 0)
21927 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21928 BaseAddr, LaneLen * Factor);
21929
21930 SmallVector<Value *, 4> Shuffles;
21931
21932 // Split the shufflevector operands into sub vectors for the new vstN call.
21933 for (unsigned i = 0; i < Factor; i++) {
21934 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21935 if (Mask[IdxI] >= 0) {
21936 Shuffles.push_back(Builder.CreateShuffleVector(
21937 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21938 } else {
21939 unsigned StartMask = 0;
21940 for (unsigned j = 1; j < LaneLen; j++) {
21941 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21942 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21943 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21944 break;
21945 }
21946 }
21947 // Note: If all elements in a chunk are undefs, StartMask=0!
21948 // Note: Filling undef gaps with random elements is ok, since
21949 // those elements were being written anyway (with undefs).
21950 // In the case of all undefs we're defaulting to using elems from 0
21951 // Note: StartMask cannot be negative, it's checked in
21952 // isReInterleaveMask
21953 Shuffles.push_back(Builder.CreateShuffleVector(
21954 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21955 }
21956 }
21957
21958 createStoreIntrinsic(BaseAddr, Shuffles);
21959 }
21960 return true;
21961}
21962
21970
21972 uint64_t &Members) {
21973 if (auto *ST = dyn_cast<StructType>(Ty)) {
21974 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21975 uint64_t SubMembers = 0;
21976 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21977 return false;
21978 Members += SubMembers;
21979 }
21980 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21981 uint64_t SubMembers = 0;
21982 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21983 return false;
21984 Members += SubMembers * AT->getNumElements();
21985 } else if (Ty->isFloatTy()) {
21986 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21987 return false;
21988 Members = 1;
21989 Base = HA_FLOAT;
21990 } else if (Ty->isDoubleTy()) {
21991 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21992 return false;
21993 Members = 1;
21994 Base = HA_DOUBLE;
21995 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21996 Members = 1;
21997 switch (Base) {
21998 case HA_FLOAT:
21999 case HA_DOUBLE:
22000 return false;
22001 case HA_VECT64:
22002 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
22003 case HA_VECT128:
22004 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
22005 case HA_UNKNOWN:
22006 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
22007 case 64:
22008 Base = HA_VECT64;
22009 return true;
22010 case 128:
22011 Base = HA_VECT128;
22012 return true;
22013 default:
22014 return false;
22015 }
22016 }
22017 }
22018
22019 return (Members > 0 && Members <= 4);
22020}
22021
22022/// Return the correct alignment for the current calling convention.
22024 Type *ArgTy, const DataLayout &DL) const {
22025 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
22026 if (!ArgTy->isVectorTy())
22027 return ABITypeAlign;
22028
22029 // Avoid over-aligning vector parameters. It would require realigning the
22030 // stack and waste space for no real benefit.
22031 return std::min(ABITypeAlign, DL.getStackAlignment());
22032}
22033
22034/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
22035/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
22036/// passing according to AAPCS rules.
22038 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22039 const DataLayout &DL) const {
22040 if (getEffectiveCallingConv(CallConv, isVarArg) !=
22042 return false;
22043
22045 uint64_t Members = 0;
22046 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
22047 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22048
22049 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22050 return IsHA || IsIntArray;
22051}
22052
22054 const Constant *PersonalityFn) const {
22055 // Platforms which do not use SjLj EH may return values in these registers
22056 // via the personality function.
22057 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22058}
22059
22061 const Constant *PersonalityFn) const {
22062 // Platforms which do not use SjLj EH may return values in these registers
22063 // via the personality function.
22064 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22065}
22066
22067void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
22068 // Update IsSplitCSR in ARMFunctionInfo.
22069 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22070 AFI->setIsSplitCSR(true);
22071}
22072
22073void ARMTargetLowering::insertCopiesSplitCSR(
22074 MachineBasicBlock *Entry,
22075 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
22076 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22077 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22078 if (!IStart)
22079 return;
22080
22081 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22082 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22083 MachineBasicBlock::iterator MBBI = Entry->begin();
22084 for (const MCPhysReg *I = IStart; *I; ++I) {
22085 const TargetRegisterClass *RC = nullptr;
22086 if (ARM::GPRRegClass.contains(*I))
22087 RC = &ARM::GPRRegClass;
22088 else if (ARM::DPRRegClass.contains(*I))
22089 RC = &ARM::DPRRegClass;
22090 else
22091 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
22092
22093 Register NewVR = MRI->createVirtualRegister(RC);
22094 // Create copy from CSR to a virtual register.
22095 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22096 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22097 // nounwind. If we want to generalize this later, we may need to emit
22098 // CFI pseudo-instructions.
22099 assert(Entry->getParent()->getFunction().hasFnAttribute(
22100 Attribute::NoUnwind) &&
22101 "Function should be nounwind in insertCopiesSplitCSR!");
22102 Entry->addLiveIn(*I);
22103 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22104 .addReg(*I);
22105
22106 // Insert the copy-back instructions right before the terminator.
22107 for (auto *Exit : Exits)
22108 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22109 TII->get(TargetOpcode::COPY), *I)
22110 .addReg(NewVR);
22111 }
22112}
22113
22117}
22118
22120 return Subtarget->hasMVEIntegerOps();
22121}
22122
22125 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22126 if (!VTy)
22127 return false;
22128
22129 auto *ScalarTy = VTy->getScalarType();
22130 unsigned NumElements = VTy->getNumElements();
22131
22132 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22133 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22134 return false;
22135
22136 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22137 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22138 return Subtarget->hasMVEFloatOps();
22139
22141 return false;
22142
22143 return Subtarget->hasMVEIntegerOps() &&
22144 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22145 ScalarTy->isIntegerTy(32));
22146}
22147
22150 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22151 Value *Accumulator) const {
22152
22153 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22154
22155 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22156
22157 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22158
22159 if (TyWidth > 128) {
22160 int Stride = Ty->getNumElements() / 2;
22161 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22162 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22163 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22164 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22165
22166 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22167 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22168 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22169 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22170 Value *LowerSplitAcc = nullptr;
22171 Value *UpperSplitAcc = nullptr;
22172
22173 if (Accumulator) {
22174 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22175 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22176 }
22177
22178 auto *LowerSplitInt = createComplexDeinterleavingIR(
22179 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22180 auto *UpperSplitInt = createComplexDeinterleavingIR(
22181 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22182
22183 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22184 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22185 }
22186
22187 auto *IntTy = Type::getInt32Ty(B.getContext());
22188
22189 ConstantInt *ConstRotation = nullptr;
22190 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22191 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22192
22193 if (Accumulator)
22194 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22195 {ConstRotation, Accumulator, InputB, InputA});
22196 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22197 {ConstRotation, InputB, InputA});
22198 }
22199
22200 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22201 // 1 means the value is not halved.
22202 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22203
22205 ConstRotation = ConstantInt::get(IntTy, 0);
22207 ConstRotation = ConstantInt::get(IntTy, 1);
22208
22209 if (!ConstRotation)
22210 return nullptr; // Invalid rotation for arm_mve_vcaddq
22211
22212 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22213 {ConstHalving, ConstRotation, InputA, InputB});
22214 }
22215
22216 return nullptr;
22217}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
static const LLT F64
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1399
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1181
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned logBase2() const
Definition: APInt.h:1719
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:455
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:312
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:321
bool hasARMOps() const
Definition: ARMSubtarget.h:265
bool supportsTailCall() const
Definition: ARMSubtarget.h:399
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:298
bool hasVFP4Base() const
Definition: ARMSubtarget.h:273
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool useFPVFMx() const
Definition: ARMSubtarget.h:282
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:288
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
bool useSjLjEH() const
Definition: ARMSubtarget.h:287
bool isTargetDarwin() const
Definition: ARMSubtarget.h:300
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isTargetAndroid() const
Definition: ARMSubtarget.h:350
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:310
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:326
bool hasVFP3Base() const
Definition: ARMSubtarget.h:272
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:286
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:302
bool hasMinSize() const
Definition: ARMSubtarget.h:363
bool isTargetIOS() const
Definition: ARMSubtarget.h:301
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:267
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:433
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:303
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:276
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:304
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:407
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:401
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:331
bool isTargetLinux() const
Definition: ARMSubtarget.h:305
bool useFPVFMx16() const
Definition: ARMSubtarget.h:285
bool isMClass() const
Definition: ARMSubtarget.h:366
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:486
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:280
bool isTargetELF() const
Definition: ARMSubtarget.h:311
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:443
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
bool isFloatingPointOperation() const
Definition: Instructions.h:864
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:94
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
The address of a basic block.
Definition: Constants.h:890
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1594
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:196
bool isBigEndian() const
Definition: DataLayout.h:197
Align getStackAlignment() const
Definition: DataLayout.h:229
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:462
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:290
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:865
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
unsigned getNumElements() const
Definition: DerivedTypes.h:582
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
arg_iterator arg_begin()
Definition: Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:686
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:232
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1896
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1433
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2514
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2173
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Value * getPointerOperand()
Definition: Instructions.h:253
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:231
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:489
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
void reserve(size_type NumEntries)
Definition: SmallPtrSet.h:114
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:819
void resize(size_type N)
Definition: SmallVector.h:652
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const unsigned char * bytes_end() const
Definition: StringRef.h:118
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const unsigned char * bytes_begin() const
Definition: StringRef.h:115
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:399
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:500
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:634
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:248
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
Type * getArrayElementType() const
Definition: Type.h:399
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:212
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1066
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1355
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1440
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:511
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1337
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1339
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1340
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1070
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1089
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:157
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1425
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1301
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1093
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1439
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:963
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1335
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1336
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:996
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:935
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1115
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1422
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1426
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:787
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1338
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1441
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1434
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1084
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1061
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1333
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1279
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:906
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1341
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1442
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1331
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1047
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1332
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1250
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1423
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1330
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1001
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:112
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:905
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1214
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1636
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1554
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2413
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2080
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1541
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1313
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ MVEVMVNModImm
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:258
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:449
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:350
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:456
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:275
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:299
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:439
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:199
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:290
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition: KnownBits.h:333
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:797
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)