LLVM 20.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/MC/MCSchedule.h"
98#include "llvm/Support/Debug.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <optional>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118using namespace llvm::PatternMatch;
119
120#define DEBUG_TYPE "arm-isel"
121
122STATISTIC(NumTailCalls, "Number of tail calls");
123STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
124STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
125STATISTIC(NumConstpoolPromoted,
126 "Number of constants with their storage promoted into constant pools");
127
128static cl::opt<bool>
129ARMInterworking("arm-interworking", cl::Hidden,
130 cl::desc("Enable / disable ARM interworking (for debugging only)"),
131 cl::init(true));
132
134 "arm-promote-constant", cl::Hidden,
135 cl::desc("Enable / disable promotion of unnamed_addr constants into "
136 "constant pools"),
137 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139 "arm-promote-constant-max-size", cl::Hidden,
140 cl::desc("Maximum size of constant to promote into a constant pool"),
141 cl::init(64));
143 "arm-promote-constant-max-total", cl::Hidden,
144 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145 cl::init(128));
146
148MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150 cl::init(2));
151
152/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153constexpr MVT FlagsVT = MVT::i32;
154
155// The APCS parameter registers.
156static const MCPhysReg GPRArgRegs[] = {
157 ARM::R0, ARM::R1, ARM::R2, ARM::R3
158};
159
161 SelectionDAG &DAG, const SDLoc &DL) {
163 assert(Arg.ArgVT.bitsLT(MVT::i32));
164 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
165 SDValue Ext =
167 MVT::i32, Trunc);
168 return Ext;
169}
170
171void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
172 if (VT != PromotedLdStVT) {
174 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
175
177 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
178 }
179
180 MVT ElemTy = VT.getVectorElementType();
181 if (ElemTy != MVT::f64)
185 if (ElemTy == MVT::i32) {
190 } else {
195 }
204 if (VT.isInteger()) {
208 }
209
210 // Neon does not support vector divide/remainder operations.
219
220 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
221 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
223 setOperationAction(Opcode, VT, Legal);
224 if (!VT.isFloatingPoint())
225 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
226 setOperationAction(Opcode, VT, Legal);
227}
228
229void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
230 addRegisterClass(VT, &ARM::DPRRegClass);
231 addTypeForNEON(VT, MVT::f64);
232}
233
234void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
235 addRegisterClass(VT, &ARM::DPairRegClass);
236 addTypeForNEON(VT, MVT::v2f64);
237}
238
239void ARMTargetLowering::setAllExpand(MVT VT) {
240 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
241 setOperationAction(Opc, VT, Expand);
242
243 // We support these really simple operations even on types where all
244 // the actual arithmetic has to be broken down into simpler
245 // operations or turned into library calls.
250}
251
252void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
253 LegalizeAction Action) {
254 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
256 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
257}
258
259void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
260 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
261
262 for (auto VT : IntTypes) {
263 addRegisterClass(VT, &ARM::MQPRRegClass);
293
294 // No native support for these.
304
305 // Vector reductions
315
316 if (!HasMVEFP) {
321 } else {
324 }
325
326 // Pre and Post inc are supported on loads and stores
327 for (unsigned im = (unsigned)ISD::PRE_INC;
333 }
334 }
335
336 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
337 for (auto VT : FloatTypes) {
338 addRegisterClass(VT, &ARM::MQPRRegClass);
339 if (!HasMVEFP)
340 setAllExpand(VT);
341
342 // These are legal or custom whether we have MVE.fp or not
355
356 // Pre and Post inc are supported on loads and stores
357 for (unsigned im = (unsigned)ISD::PRE_INC;
363 }
364
365 if (HasMVEFP) {
373
374 // No native support for these.
389 }
390 }
391
392 // Custom Expand smaller than legal vector reductions to prevent false zero
393 // items being added.
402
403 // We 'support' these types up to bitcast/load/store level, regardless of
404 // MVE integer-only / float support. Only doing FP data processing on the FP
405 // vector types is inhibited at integer-only level.
406 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
407 for (auto VT : LongTypes) {
408 addRegisterClass(VT, &ARM::MQPRRegClass);
409 setAllExpand(VT);
415 }
417
418 // We can do bitwise operations on v2i64 vectors
419 setOperationAction(ISD::AND, MVT::v2i64, Legal);
420 setOperationAction(ISD::OR, MVT::v2i64, Legal);
421 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
422
423 // It is legal to extload from v4i8 to v4i16 or v4i32.
424 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
426 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
427
428 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
434
435 // Some truncating stores are legal too.
436 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
437 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
438 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
439
440 // Pre and Post inc on these are legal, given the correct extends
441 for (unsigned im = (unsigned)ISD::PRE_INC;
443 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
448 }
449 }
450
451 // Predicate types
452 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
453 for (auto VT : pTypes) {
454 addRegisterClass(VT, &ARM::VCCRRegClass);
469
470 if (!HasMVEFP) {
475 }
476 }
480 setOperationAction(ISD::OR, MVT::v2i1, Expand);
486
495}
496
498 const ARMSubtarget &STI)
499 : TargetLowering(TM), Subtarget(&STI) {
500 RegInfo = Subtarget->getRegisterInfo();
501 Itins = Subtarget->getInstrItineraryData();
502
505
506 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
507 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
508 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
509 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
510 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
511 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
513 }
514
515 if (Subtarget->isTargetMachO()) {
516 // Uses VFP for Thumb libfuncs if available.
517 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
518 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
519 static const struct {
520 const RTLIB::Libcall Op;
521 const char * const Name;
522 const ISD::CondCode Cond;
523 } LibraryCalls[] = {
524 // Single-precision floating-point arithmetic.
525 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
528 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
529
530 // Double-precision floating-point arithmetic.
531 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
534 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
535
536 // Single-precision comparisons.
537 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
538 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
539 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
540 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
541 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
542 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
543 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
544
545 // Double-precision comparisons.
546 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
547 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
548 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
549 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
550 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
551 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
552 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
553
554 // Floating-point to integer conversions.
555 // i64 conversions are done via library routines even when generating VFP
556 // instructions, so use the same ones.
557 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
560 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
561
562 // Conversions between floating types.
563 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
564 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
565
566 // Integer to floating-point conversions.
567 // i64 conversions are done via library routines even when generating VFP
568 // instructions, so use the same ones.
569 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
570 // e.g., __floatunsidf vs. __floatunssidfvfp.
571 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
573 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
574 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
575 };
576
577 for (const auto &LC : LibraryCalls) {
578 setLibcallName(LC.Op, LC.Name);
579 if (LC.Cond != ISD::SETCC_INVALID)
580 setCmpLibcallCC(LC.Op, LC.Cond);
581 }
582 }
583 }
584
585 // RTLIB
586 if (Subtarget->isAAPCS_ABI() &&
587 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
588 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
589 static const struct {
590 const RTLIB::Libcall Op;
591 const char * const Name;
592 const CallingConv::ID CC;
593 const ISD::CondCode Cond;
594 } LibraryCalls[] = {
595 // Double-precision floating-point arithmetic helper functions
596 // RTABI chapter 4.1.2, Table 2
597 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601
602 // Double-precision floating-point comparison helper functions
603 // RTABI chapter 4.1.2, Table 3
604 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
606 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
610 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
611
612 // Single-precision floating-point arithmetic helper functions
613 // RTABI chapter 4.1.2, Table 4
614 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618
619 // Single-precision floating-point comparison helper functions
620 // RTABI chapter 4.1.2, Table 5
621 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
623 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
627 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
628
629 // Floating-point to integer conversions.
630 // RTABI chapter 4.1.2, Table 6
631 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639
640 // Conversions between floating types.
641 // RTABI chapter 4.1.2, Table 7
642 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645
646 // Integer to floating-point conversions.
647 // RTABI chapter 4.1.2, Table 8
648 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656
657 // Long long helper functions
658 // RTABI chapter 4.2, Table 9
659 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663
664 // Integer division functions
665 // RTABI chapter 4.3.1
666 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
674 };
675
676 for (const auto &LC : LibraryCalls) {
677 setLibcallName(LC.Op, LC.Name);
678 setLibcallCallingConv(LC.Op, LC.CC);
679 if (LC.Cond != ISD::SETCC_INVALID)
680 setCmpLibcallCC(LC.Op, LC.Cond);
681 }
682
683 // EABI dependent RTLIB
684 if (TM.Options.EABIVersion == EABI::EABI4 ||
685 TM.Options.EABIVersion == EABI::EABI5) {
686 static const struct {
687 const RTLIB::Libcall Op;
688 const char *const Name;
689 const CallingConv::ID CC;
690 const ISD::CondCode Cond;
691 } MemOpsLibraryCalls[] = {
692 // Memory operations
693 // RTABI chapter 4.3.4
694 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
697 };
698
699 for (const auto &LC : MemOpsLibraryCalls) {
700 setLibcallName(LC.Op, LC.Name);
701 setLibcallCallingConv(LC.Op, LC.CC);
702 if (LC.Cond != ISD::SETCC_INVALID)
703 setCmpLibcallCC(LC.Op, LC.Cond);
704 }
705 }
706 }
707
708 if (Subtarget->isTargetWindows()) {
709 static const struct {
710 const RTLIB::Libcall Op;
711 const char * const Name;
712 const CallingConv::ID CC;
713 } LibraryCalls[] = {
714 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
721 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
722 };
723
724 for (const auto &LC : LibraryCalls) {
725 setLibcallName(LC.Op, LC.Name);
726 setLibcallCallingConv(LC.Op, LC.CC);
727 }
728 }
729
730 // Use divmod compiler-rt calls for iOS 5.0 and later.
731 if (Subtarget->isTargetMachO() &&
732 !(Subtarget->isTargetIOS() &&
733 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
734 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
735 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
736 }
737
738 // The half <-> float conversion functions are always soft-float on
739 // non-watchos platforms, but are needed for some targets which use a
740 // hard-float calling convention by default.
741 if (!Subtarget->isTargetWatchABI()) {
742 if (Subtarget->isAAPCS_ABI()) {
743 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
745 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
746 } else {
747 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
749 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
750 }
751 }
752
753 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
754 // a __gnu_ prefix (which is the default).
755 if (Subtarget->isTargetAEABI()) {
756 static const struct {
757 const RTLIB::Libcall Op;
758 const char * const Name;
759 const CallingConv::ID CC;
760 } LibraryCalls[] = {
761 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
763 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
764 };
765
766 for (const auto &LC : LibraryCalls) {
767 setLibcallName(LC.Op, LC.Name);
768 setLibcallCallingConv(LC.Op, LC.CC);
769 }
770 }
771
772 if (Subtarget->isThumb1Only())
773 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
774 else
775 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
776
777 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
778 Subtarget->hasFPRegs()) {
779 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
780 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
781
786
787 if (!Subtarget->hasVFP2Base())
788 setAllExpand(MVT::f32);
789 if (!Subtarget->hasFP64())
790 setAllExpand(MVT::f64);
791 }
792
793 if (Subtarget->hasFullFP16()) {
794 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
797
800 }
801
802 if (Subtarget->hasBF16()) {
803 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
804 setAllExpand(MVT::bf16);
805 if (!Subtarget->hasFullFP16())
807 } else {
812 }
813
815 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
816 setTruncStoreAction(VT, InnerVT, Expand);
817 addAllExtLoads(VT, InnerVT, Expand);
818 }
819
822
824 }
825
828
831
832 if (Subtarget->hasMVEIntegerOps())
833 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
834
835 // Combine low-overhead loop intrinsics so that we can lower i1 types.
836 if (Subtarget->hasLOB()) {
838 }
839
840 if (Subtarget->hasNEON()) {
841 addDRTypeForNEON(MVT::v2f32);
842 addDRTypeForNEON(MVT::v8i8);
843 addDRTypeForNEON(MVT::v4i16);
844 addDRTypeForNEON(MVT::v2i32);
845 addDRTypeForNEON(MVT::v1i64);
846
847 addQRTypeForNEON(MVT::v4f32);
848 addQRTypeForNEON(MVT::v2f64);
849 addQRTypeForNEON(MVT::v16i8);
850 addQRTypeForNEON(MVT::v8i16);
851 addQRTypeForNEON(MVT::v4i32);
852 addQRTypeForNEON(MVT::v2i64);
853
854 if (Subtarget->hasFullFP16()) {
855 addQRTypeForNEON(MVT::v8f16);
856 addDRTypeForNEON(MVT::v4f16);
857 }
858
859 if (Subtarget->hasBF16()) {
860 addQRTypeForNEON(MVT::v8bf16);
861 addDRTypeForNEON(MVT::v4bf16);
862 }
863 }
864
865 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
866 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
867 // none of Neon, MVE or VFP supports any arithmetic operations on it.
868 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
869 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
870 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
871 // FIXME: Code duplication: FDIV and FREM are expanded always, see
872 // ARMTargetLowering::addTypeForNEON method for details.
873 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
874 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
875 // FIXME: Create unittest.
876 // In another words, find a way when "copysign" appears in DAG with vector
877 // operands.
879 // FIXME: Code duplication: SETCC has custom operation action, see
880 // ARMTargetLowering::addTypeForNEON method for details.
882 // FIXME: Create unittest for FNEG and for FABS.
883 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
884 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
886 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
887 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
888 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
889 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
890 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
893 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
896 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
902 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
903 }
904
905 if (Subtarget->hasNEON()) {
906 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
907 // supported for v4f32.
909 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
910 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
911 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
912 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
913 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
916 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
924
925 // Mark v2f32 intrinsics.
927 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
928 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
929 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
930 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
931 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
934 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
942
943 // Neon does not support some operations on v1i64 and v2i64 types.
944 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
945 // Custom handling for some quad-vector types to detect VMULL.
946 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
947 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
948 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
949 // Custom handling for some vector types to avoid expensive expansions
950 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
952 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
954 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
955 // a destination type that is wider than the source, and nor does
956 // it have a FP_TO_[SU]INT instruction with a narrower destination than
957 // source.
966
969
970 // NEON does not have single instruction CTPOP for vectors with element
971 // types wider than 8-bits. However, custom lowering can leverage the
972 // v8i8/v16i8 vcnt instruction.
979
980 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
981 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
982
983 // NEON does not have single instruction CTTZ for vectors.
985 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
986 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
987 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
988
989 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
990 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
991 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
992 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
993
998
1003
1007 }
1008
1009 // NEON only has FMA instructions as of VFP4.
1010 if (!Subtarget->hasVFP4Base()) {
1011 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1012 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1013 }
1014
1017
1018 // It is legal to extload from v4i8 to v4i16 or v4i32.
1019 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1020 MVT::v2i32}) {
1025 }
1026 }
1027
1028 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1029 MVT::v4i32}) {
1034 }
1035 }
1036
1037 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1044 }
1045 if (Subtarget->hasMVEIntegerOps()) {
1048 ISD::SETCC});
1049 }
1050 if (Subtarget->hasMVEFloatOps()) {
1052 }
1053
1054 if (!Subtarget->hasFP64()) {
1055 // When targeting a floating-point unit with only single-precision
1056 // operations, f64 is legal for the few double-precision instructions which
1057 // are present However, no double-precision operations other than moves,
1058 // loads and stores are provided by the hardware.
1096 }
1097
1098 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1101 if (Subtarget->hasFullFP16()) {
1104 }
1105 }
1106
1107 if (!Subtarget->hasFP16()) {
1110 }
1111
1113
1114 // ARM does not have floating-point extending loads.
1115 for (MVT VT : MVT::fp_valuetypes()) {
1116 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1117 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1118 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
1119 }
1120
1121 // ... or truncating stores
1122 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1123 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1124 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1125 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
1126 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
1127
1128 // ARM does not have i1 sign extending load.
1129 for (MVT VT : MVT::integer_valuetypes())
1130 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1131
1132 // ARM supports all 4 flavors of integer indexed load / store.
1133 if (!Subtarget->isThumb1Only()) {
1134 for (unsigned im = (unsigned)ISD::PRE_INC;
1136 setIndexedLoadAction(im, MVT::i1, Legal);
1137 setIndexedLoadAction(im, MVT::i8, Legal);
1138 setIndexedLoadAction(im, MVT::i16, Legal);
1139 setIndexedLoadAction(im, MVT::i32, Legal);
1140 setIndexedStoreAction(im, MVT::i1, Legal);
1141 setIndexedStoreAction(im, MVT::i8, Legal);
1142 setIndexedStoreAction(im, MVT::i16, Legal);
1143 setIndexedStoreAction(im, MVT::i32, Legal);
1144 }
1145 } else {
1146 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1149 }
1150
1155
1158 if (Subtarget->hasDSP()) {
1167 }
1168 if (Subtarget->hasBaseDSP()) {
1171 }
1172
1173 // i64 operation support.
1176 if (Subtarget->isThumb1Only()) {
1179 }
1180 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1181 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1183
1193
1194 // MVE lowers 64 bit shifts to lsll and lsrl
1195 // assuming that ISD::SRL and SRA of i64 are already marked custom
1196 if (Subtarget->hasMVEIntegerOps())
1198
1199 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1200 if (Subtarget->isThumb1Only()) {
1204 }
1205
1206 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1208
1209 // ARM does not have ROTL.
1214 }
1217 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1220 }
1221
1222 // @llvm.readcyclecounter requires the Performance Monitors extension.
1223 // Default to the 0 expansion on unsupported platforms.
1224 // FIXME: Technically there are older ARM CPUs that have
1225 // implementation-specific ways of obtaining this information.
1226 if (Subtarget->hasPerfMon())
1228
1229 // Only ARMv6 has BSWAP.
1230 if (!Subtarget->hasV6Ops())
1232
1233 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1234 : Subtarget->hasDivideInARMMode();
1235 if (!hasDivide) {
1236 // These are expanded into libcalls if the cpu doesn't have HW divider.
1239 }
1240
1241 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1244
1247 }
1248
1251
1252 // Register based DivRem for AEABI (RTABI 4.2)
1253 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1254 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1255 Subtarget->isTargetWindows()) {
1258 HasStandaloneRem = false;
1259
1260 if (Subtarget->isTargetWindows()) {
1261 const struct {
1262 const RTLIB::Libcall Op;
1263 const char * const Name;
1264 const CallingConv::ID CC;
1265 } LibraryCalls[] = {
1266 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1267 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1268 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1269 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1270
1271 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1272 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1273 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1274 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1275 };
1276
1277 for (const auto &LC : LibraryCalls) {
1278 setLibcallName(LC.Op, LC.Name);
1279 setLibcallCallingConv(LC.Op, LC.CC);
1280 }
1281 } else {
1282 const struct {
1283 const RTLIB::Libcall Op;
1284 const char * const Name;
1285 const CallingConv::ID CC;
1286 } LibraryCalls[] = {
1287 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1288 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1289 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1290 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1291
1292 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1293 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1294 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1295 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1296 };
1297
1298 for (const auto &LC : LibraryCalls) {
1299 setLibcallName(LC.Op, LC.Name);
1300 setLibcallCallingConv(LC.Op, LC.CC);
1301 }
1302 }
1303
1308 } else {
1311 }
1312
1317
1318 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1320
1321 // Use the default implementation.
1323 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1325 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1328
1329 if (Subtarget->isTargetWindows())
1331 else
1333
1334 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1335 // the default expansion.
1336 InsertFencesForAtomic = false;
1337 if (Subtarget->hasAnyDataBarrier() &&
1338 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1339 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1340 // to ldrex/strex loops already.
1342 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1344
1345 // On v8, we have particularly efficient implementations of atomic fences
1346 // if they can be combined with nearby atomic loads and stores.
1347 if (!Subtarget->hasAcquireRelease() ||
1348 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1349 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1350 InsertFencesForAtomic = true;
1351 }
1352 } else {
1353 // If there's anything we can use as a barrier, go through custom lowering
1354 // for ATOMIC_FENCE.
1355 // If target has DMB in thumb, Fences can be inserted.
1356 if (Subtarget->hasDataBarrier())
1357 InsertFencesForAtomic = true;
1358
1360 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1361
1362 // Set them all for libcall, which will force libcalls.
1375 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1376 // Unordered/Monotonic case.
1377 if (!InsertFencesForAtomic) {
1380 }
1381 }
1382
1383 // Compute supported atomic widths.
1384 if (Subtarget->isTargetLinux() ||
1385 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1386 // For targets where __sync_* routines are reliably available, we use them
1387 // if necessary.
1388 //
1389 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1390 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1391 //
1392 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1393 // such targets should provide __sync_* routines, which use the ARM mode
1394 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1395 // encoding; see ARMISD::MEMBARRIER_MCR.)
1397 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1398 Subtarget->hasForced32BitAtomics()) {
1399 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1401 } else {
1402 // We can't assume anything about other targets; just use libatomic
1403 // routines.
1405 }
1406
1408
1410
1411 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1412 if (!Subtarget->hasV6Ops()) {
1415 }
1417
1418 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1419 !Subtarget->isThumb1Only()) {
1420 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1421 // iff target supports vfp2.
1431 }
1432
1433 // We want to custom lower some of our intrinsics.
1438 if (Subtarget->useSjLjEH())
1439 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1440
1450 if (Subtarget->hasFullFP16()) {
1454 }
1455
1457
1460 if (Subtarget->hasFullFP16())
1464 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1465
1466 // We don't support sin/cos/fmod/copysign/pow
1475 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1476 !Subtarget->isThumb1Only()) {
1479 }
1482
1483 if (!Subtarget->hasVFP4Base()) {
1486 }
1487
1488 // Various VFP goodness
1489 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1490 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1491 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1494 }
1495
1496 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1497 if (!Subtarget->hasFP16()) {
1500 }
1501
1502 // Strict floating-point comparisons need custom lowering.
1509 }
1510
1511 // Use __sincos_stret if available.
1512 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1513 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1516 }
1517
1518 // FP-ARMv8 implements a lot of rounding-like FP operations.
1519 if (Subtarget->hasFPARMv8Base()) {
1528 if (Subtarget->hasNEON()) {
1533 }
1534
1535 if (Subtarget->hasFP64()) {
1544 }
1545 }
1546
1547 // FP16 often need to be promoted to call lib functions
1548 if (Subtarget->hasFullFP16()) {
1563
1565 }
1566
1567 if (Subtarget->hasNEON()) {
1568 // vmin and vmax aren't available in a scalar form, so we can use
1569 // a NEON instruction with an undef lane instead.
1578
1579 if (Subtarget->hasFullFP16()) {
1584
1589 }
1590 }
1591
1592 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1593 // it, but it's just a wrapper around ldexp.
1594 if (Subtarget->isTargetWindows()) {
1596 if (isOperationExpand(Op, MVT::f32))
1597 setOperationAction(Op, MVT::f32, Promote);
1598 }
1599
1600 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1601 // isn't legal.
1603 if (isOperationExpand(Op, MVT::f16))
1604 setOperationAction(Op, MVT::f16, Promote);
1605
1606 // We have target-specific dag combine patterns for the following nodes:
1607 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1610
1611 if (Subtarget->hasMVEIntegerOps())
1613
1614 if (Subtarget->hasV6Ops())
1616 if (Subtarget->isThumb1Only())
1618 // Attempt to lower smin/smax to ssat/usat
1619 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1620 Subtarget->isThumb2()) {
1622 }
1623
1625
1626 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1627 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1629 else
1631
1632 //// temporary - rewrite interface to use type
1635 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1637 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1639
1640 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1641 // are at least 4 bytes aligned.
1643
1644 // Prefer likely predicted branches to selects on out-of-order cores.
1645 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1646
1649 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1650
1651 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1652}
1653
1655 return Subtarget->useSoftFloat();
1656}
1657
1658// FIXME: It might make sense to define the representative register class as the
1659// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1660// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1661// SPR's representative would be DPR_VFP2. This should work well if register
1662// pressure tracking were modified such that a register use would increment the
1663// pressure of the register class's representative and all of it's super
1664// classes' representatives transitively. We have not implemented this because
1665// of the difficulty prior to coalescing of modeling operand register classes
1666// due to the common occurrence of cross class copies and subregister insertions
1667// and extractions.
1668std::pair<const TargetRegisterClass *, uint8_t>
1670 MVT VT) const {
1671 const TargetRegisterClass *RRC = nullptr;
1672 uint8_t Cost = 1;
1673 switch (VT.SimpleTy) {
1674 default:
1676 // Use DPR as representative register class for all floating point
1677 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1678 // the cost is 1 for both f32 and f64.
1679 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1680 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1681 RRC = &ARM::DPRRegClass;
1682 // When NEON is used for SP, only half of the register file is available
1683 // because operations that define both SP and DP results will be constrained
1684 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1685 // coalescing by double-counting the SP regs. See the FIXME above.
1686 if (Subtarget->useNEONForSinglePrecisionFP())
1687 Cost = 2;
1688 break;
1689 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1690 case MVT::v4f32: case MVT::v2f64:
1691 RRC = &ARM::DPRRegClass;
1692 Cost = 2;
1693 break;
1694 case MVT::v4i64:
1695 RRC = &ARM::DPRRegClass;
1696 Cost = 4;
1697 break;
1698 case MVT::v8i64:
1699 RRC = &ARM::DPRRegClass;
1700 Cost = 8;
1701 break;
1702 }
1703 return std::make_pair(RRC, Cost);
1704}
1705
1706const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1707#define MAKE_CASE(V) \
1708 case V: \
1709 return #V;
1710 switch ((ARMISD::NodeType)Opcode) {
1712 break;
1915#undef MAKE_CASE
1916 }
1917 return nullptr;
1918}
1919
1921 EVT VT) const {
1922 if (!VT.isVector())
1923 return getPointerTy(DL);
1924
1925 // MVE has a predicate register.
1926 if ((Subtarget->hasMVEIntegerOps() &&
1927 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1928 VT == MVT::v16i8)) ||
1929 (Subtarget->hasMVEFloatOps() &&
1930 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1931 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1933}
1934
1935/// getRegClassFor - Return the register class that should be used for the
1936/// specified value type.
1937const TargetRegisterClass *
1938ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1939 (void)isDivergent;
1940 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1941 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1942 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1943 // MVE Q registers.
1944 if (Subtarget->hasNEON()) {
1945 if (VT == MVT::v4i64)
1946 return &ARM::QQPRRegClass;
1947 if (VT == MVT::v8i64)
1948 return &ARM::QQQQPRRegClass;
1949 }
1950 if (Subtarget->hasMVEIntegerOps()) {
1951 if (VT == MVT::v4i64)
1952 return &ARM::MQQPRRegClass;
1953 if (VT == MVT::v8i64)
1954 return &ARM::MQQQQPRRegClass;
1955 }
1957}
1958
1959// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1960// source/dest is aligned and the copy size is large enough. We therefore want
1961// to align such objects passed to memory intrinsics.
1963 Align &PrefAlign) const {
1964 if (!isa<MemIntrinsic>(CI))
1965 return false;
1966 MinSize = 8;
1967 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1968 // cycle faster than 4-byte aligned LDM.
1969 PrefAlign =
1970 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1971 return true;
1972}
1973
1974// Create a fast isel object.
1975FastISel *
1977 const TargetLibraryInfo *libInfo) const {
1978 return ARM::createFastISel(funcInfo, libInfo);
1979}
1980
1982 unsigned NumVals = N->getNumValues();
1983 if (!NumVals)
1984 return Sched::RegPressure;
1985
1986 for (unsigned i = 0; i != NumVals; ++i) {
1987 EVT VT = N->getValueType(i);
1988 if (VT == MVT::Glue || VT == MVT::Other)
1989 continue;
1990 if (VT.isFloatingPoint() || VT.isVector())
1991 return Sched::ILP;
1992 }
1993
1994 if (!N->isMachineOpcode())
1995 return Sched::RegPressure;
1996
1997 // Load are scheduled for latency even if there instruction itinerary
1998 // is not available.
1999 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2000 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
2001
2002 if (MCID.getNumDefs() == 0)
2003 return Sched::RegPressure;
2004 if (!Itins->isEmpty() &&
2005 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
2006 return Sched::ILP;
2007
2008 return Sched::RegPressure;
2009}
2010
2011//===----------------------------------------------------------------------===//
2012// Lowering Code
2013//===----------------------------------------------------------------------===//
2014
2015static bool isSRL16(const SDValue &Op) {
2016 if (Op.getOpcode() != ISD::SRL)
2017 return false;
2018 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2019 return Const->getZExtValue() == 16;
2020 return false;
2021}
2022
2023static bool isSRA16(const SDValue &Op) {
2024 if (Op.getOpcode() != ISD::SRA)
2025 return false;
2026 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2027 return Const->getZExtValue() == 16;
2028 return false;
2029}
2030
2031static bool isSHL16(const SDValue &Op) {
2032 if (Op.getOpcode() != ISD::SHL)
2033 return false;
2034 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2035 return Const->getZExtValue() == 16;
2036 return false;
2037}
2038
2039// Check for a signed 16-bit value. We special case SRA because it makes it
2040// more simple when also looking for SRAs that aren't sign extending a
2041// smaller value. Without the check, we'd need to take extra care with
2042// checking order for some operations.
2043static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2044 if (isSRA16(Op))
2045 return isSHL16(Op.getOperand(0));
2046 return DAG.ComputeNumSignBits(Op) == 17;
2047}
2048
2049/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2051 switch (CC) {
2052 default: llvm_unreachable("Unknown condition code!");
2053 case ISD::SETNE: return ARMCC::NE;
2054 case ISD::SETEQ: return ARMCC::EQ;
2055 case ISD::SETGT: return ARMCC::GT;
2056 case ISD::SETGE: return ARMCC::GE;
2057 case ISD::SETLT: return ARMCC::LT;
2058 case ISD::SETLE: return ARMCC::LE;
2059 case ISD::SETUGT: return ARMCC::HI;
2060 case ISD::SETUGE: return ARMCC::HS;
2061 case ISD::SETULT: return ARMCC::LO;
2062 case ISD::SETULE: return ARMCC::LS;
2063 }
2064}
2065
2066/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2068 ARMCC::CondCodes &CondCode2) {
2069 CondCode2 = ARMCC::AL;
2070 switch (CC) {
2071 default: llvm_unreachable("Unknown FP condition!");
2072 case ISD::SETEQ:
2073 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2074 case ISD::SETGT:
2075 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2076 case ISD::SETGE:
2077 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2078 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2079 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2080 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2081 case ISD::SETO: CondCode = ARMCC::VC; break;
2082 case ISD::SETUO: CondCode = ARMCC::VS; break;
2083 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2084 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2085 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2086 case ISD::SETLT:
2087 case ISD::SETULT: CondCode = ARMCC::LT; break;
2088 case ISD::SETLE:
2089 case ISD::SETULE: CondCode = ARMCC::LE; break;
2090 case ISD::SETNE:
2091 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2092 }
2093}
2094
2095//===----------------------------------------------------------------------===//
2096// Calling Convention Implementation
2097//===----------------------------------------------------------------------===//
2098
2099/// getEffectiveCallingConv - Get the effective calling convention, taking into
2100/// account presence of floating point hardware and calling convention
2101/// limitations, such as support for variadic functions.
2103ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2104 bool isVarArg) const {
2105 switch (CC) {
2106 default:
2107 report_fatal_error("Unsupported calling convention");
2110 case CallingConv::GHC:
2112 return CC;
2118 case CallingConv::Swift:
2121 case CallingConv::C:
2122 case CallingConv::Tail:
2123 if (!Subtarget->isAAPCS_ABI())
2124 return CallingConv::ARM_APCS;
2125 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2126 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2127 !isVarArg)
2129 else
2131 case CallingConv::Fast:
2133 if (!Subtarget->isAAPCS_ABI()) {
2134 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2135 return CallingConv::Fast;
2136 return CallingConv::ARM_APCS;
2137 } else if (Subtarget->hasVFP2Base() &&
2138 !Subtarget->isThumb1Only() && !isVarArg)
2140 else
2142 }
2143}
2144
2146 bool isVarArg) const {
2147 return CCAssignFnForNode(CC, false, isVarArg);
2148}
2149
2151 bool isVarArg) const {
2152 return CCAssignFnForNode(CC, true, isVarArg);
2153}
2154
2155/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2156/// CallingConvention.
2157CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2158 bool Return,
2159 bool isVarArg) const {
2160 switch (getEffectiveCallingConv(CC, isVarArg)) {
2161 default:
2162 report_fatal_error("Unsupported calling convention");
2164 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2166 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2168 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2169 case CallingConv::Fast:
2170 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2171 case CallingConv::GHC:
2172 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2174 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2176 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2178 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2179 }
2180}
2181
2182SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2183 MVT LocVT, MVT ValVT, SDValue Val) const {
2184 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2185 Val);
2186 if (Subtarget->hasFullFP16()) {
2187 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2188 } else {
2189 Val = DAG.getNode(ISD::TRUNCATE, dl,
2190 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2191 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2192 }
2193 return Val;
2194}
2195
2196SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2197 MVT LocVT, MVT ValVT,
2198 SDValue Val) const {
2199 if (Subtarget->hasFullFP16()) {
2200 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2201 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2202 } else {
2203 Val = DAG.getNode(ISD::BITCAST, dl,
2204 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2205 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2206 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2207 }
2208 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2209}
2210
2211/// LowerCallResult - Lower the result values of a call into the
2212/// appropriate copies out of appropriate physical registers.
2213SDValue ARMTargetLowering::LowerCallResult(
2214 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2215 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2216 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2217 SDValue ThisVal, bool isCmseNSCall) const {
2218 // Assign locations to each value returned by this call.
2220 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2221 *DAG.getContext());
2222 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2223
2224 // Copy all of the result registers out of their specified physreg.
2225 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2226 CCValAssign VA = RVLocs[i];
2227
2228 // Pass 'this' value directly from the argument to return value, to avoid
2229 // reg unit interference
2230 if (i == 0 && isThisReturn) {
2231 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2232 "unexpected return calling convention register assignment");
2233 InVals.push_back(ThisVal);
2234 continue;
2235 }
2236
2237 SDValue Val;
2238 if (VA.needsCustom() &&
2239 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2240 // Handle f64 or half of a v2f64.
2241 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2242 InGlue);
2243 Chain = Lo.getValue(1);
2244 InGlue = Lo.getValue(2);
2245 VA = RVLocs[++i]; // skip ahead to next loc
2246 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2247 InGlue);
2248 Chain = Hi.getValue(1);
2249 InGlue = Hi.getValue(2);
2250 if (!Subtarget->isLittle())
2251 std::swap (Lo, Hi);
2252 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2253
2254 if (VA.getLocVT() == MVT::v2f64) {
2255 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2256 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2257 DAG.getConstant(0, dl, MVT::i32));
2258
2259 VA = RVLocs[++i]; // skip ahead to next loc
2260 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2261 Chain = Lo.getValue(1);
2262 InGlue = Lo.getValue(2);
2263 VA = RVLocs[++i]; // skip ahead to next loc
2264 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2265 Chain = Hi.getValue(1);
2266 InGlue = Hi.getValue(2);
2267 if (!Subtarget->isLittle())
2268 std::swap (Lo, Hi);
2269 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2270 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2271 DAG.getConstant(1, dl, MVT::i32));
2272 }
2273 } else {
2274 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2275 InGlue);
2276 Chain = Val.getValue(1);
2277 InGlue = Val.getValue(2);
2278 }
2279
2280 switch (VA.getLocInfo()) {
2281 default: llvm_unreachable("Unknown loc info!");
2282 case CCValAssign::Full: break;
2283 case CCValAssign::BCvt:
2284 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2285 break;
2286 }
2287
2288 // f16 arguments have their size extended to 4 bytes and passed as if they
2289 // had been copied to the LSBs of a 32-bit register.
2290 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2291 if (VA.needsCustom() &&
2292 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2293 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2294
2295 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2296 // is less than 32 bits must be sign- or zero-extended after the call for
2297 // security reasons. Although the ABI mandates an extension done by the
2298 // callee, the latter cannot be trusted to follow the rules of the ABI.
2299 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2300 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2301 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2302 Val = handleCMSEValue(Val, Arg, DAG, dl);
2303
2304 InVals.push_back(Val);
2305 }
2306
2307 return Chain;
2308}
2309
2310std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2311 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2312 bool IsTailCall, int SPDiff) const {
2313 SDValue DstAddr;
2314 MachinePointerInfo DstInfo;
2315 int32_t Offset = VA.getLocMemOffset();
2317
2318 if (IsTailCall) {
2319 Offset += SPDiff;
2320 auto PtrVT = getPointerTy(DAG.getDataLayout());
2321 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2322 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2323 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2324 DstInfo =
2326 } else {
2327 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2328 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2329 StackPtr, PtrOff);
2330 DstInfo =
2332 }
2333
2334 return std::make_pair(DstAddr, DstInfo);
2335}
2336
2337// Returns the type of copying which is required to set up a byval argument to
2338// a tail-called function. This isn't needed for non-tail calls, because they
2339// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2340// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2341// optimised to zero copies when forwarding an argument from the caller's
2342// caller (NoCopy).
2343ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2344 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2347
2348 // Globals are always safe to copy from.
2349 if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
2350 return CopyOnce;
2351
2352 // Can only analyse frame index nodes, conservatively assume we need a
2353 // temporary.
2354 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2355 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2356 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2357 return CopyViaTemp;
2358
2359 int SrcFI = SrcFrameIdxNode->getIndex();
2360 int DstFI = DstFrameIdxNode->getIndex();
2361 assert(MFI.isFixedObjectIndex(DstFI) &&
2362 "byval passed in non-fixed stack slot");
2363
2364 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2365 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2366
2367 // If the source is in the local frame, then the copy to the argument memory
2368 // is always valid.
2369 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2370 if (!FixedSrc ||
2371 (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2372 return CopyOnce;
2373
2374 // In the case of byval arguments split between registers and the stack,
2375 // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2376 // stack portion, but the Src SDValue will refer to the full value, including
2377 // the local stack memory that the register portion gets stored into. We only
2378 // need to compare them for equality, so normalise on the full value version.
2379 uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2380 DstOffset -= RegSize;
2381
2382 // If the value is already in the correct location, then no copying is
2383 // needed. If not, then we need to copy via a temporary.
2384 if (SrcOffset == DstOffset)
2385 return NoCopy;
2386 else
2387 return CopyViaTemp;
2388}
2389
2390void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2391 SDValue Chain, SDValue &Arg,
2392 RegsToPassVector &RegsToPass,
2393 CCValAssign &VA, CCValAssign &NextVA,
2394 SDValue &StackPtr,
2395 SmallVectorImpl<SDValue> &MemOpChains,
2396 bool IsTailCall,
2397 int SPDiff) const {
2398 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2399 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2400 unsigned id = Subtarget->isLittle() ? 0 : 1;
2401 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2402
2403 if (NextVA.isRegLoc())
2404 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2405 else {
2406 assert(NextVA.isMemLoc());
2407 if (!StackPtr.getNode())
2408 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2410
2411 SDValue DstAddr;
2412 MachinePointerInfo DstInfo;
2413 std::tie(DstAddr, DstInfo) =
2414 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2415 MemOpChains.push_back(
2416 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2417 }
2418}
2419
2420static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2421 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2423}
2424
2425/// LowerCall - Lowering a call into a callseq_start <-
2426/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2427/// nodes.
2428SDValue
2429ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2430 SmallVectorImpl<SDValue> &InVals) const {
2431 SelectionDAG &DAG = CLI.DAG;
2432 SDLoc &dl = CLI.DL;
2434 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2436 SDValue Chain = CLI.Chain;
2437 SDValue Callee = CLI.Callee;
2438 bool &isTailCall = CLI.IsTailCall;
2439 CallingConv::ID CallConv = CLI.CallConv;
2440 bool doesNotRet = CLI.DoesNotReturn;
2441 bool isVarArg = CLI.IsVarArg;
2442
2447 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2448 bool isThisReturn = false;
2449 bool isCmseNSCall = false;
2450 bool isSibCall = false;
2451 bool PreferIndirect = false;
2452 bool GuardWithBTI = false;
2453
2454 // Analyze operands of the call, assigning locations to each operand.
2456 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2457 *DAG.getContext());
2458 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2459
2460 // Lower 'returns_twice' calls to a pseudo-instruction.
2461 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2462 !Subtarget->noBTIAtReturnTwice())
2463 GuardWithBTI = AFI->branchTargetEnforcement();
2464
2465 // Determine whether this is a non-secure function call.
2466 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2467 isCmseNSCall = true;
2468
2469 // Disable tail calls if they're not supported.
2470 if (!Subtarget->supportsTailCall())
2471 isTailCall = false;
2472
2473 // For both the non-secure calls and the returns from a CMSE entry function,
2474 // the function needs to do some extra work after the call, or before the
2475 // return, respectively, thus it cannot end with a tail call
2476 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2477 isTailCall = false;
2478
2479 if (isa<GlobalAddressSDNode>(Callee)) {
2480 // If we're optimizing for minimum size and the function is called three or
2481 // more times in this block, we can improve codesize by calling indirectly
2482 // as BLXr has a 16-bit encoding.
2483 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2484 if (CLI.CB) {
2485 auto *BB = CLI.CB->getParent();
2486 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2487 count_if(GV->users(), [&BB](const User *U) {
2488 return isa<Instruction>(U) &&
2489 cast<Instruction>(U)->getParent() == BB;
2490 }) > 2;
2491 }
2492 }
2493 if (isTailCall) {
2494 // Check if it's really possible to do a tail call.
2495 isTailCall =
2496 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2497
2498 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2499 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2500 isSibCall = true;
2501
2502 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2503 // detected sibcalls.
2504 if (isTailCall)
2505 ++NumTailCalls;
2506 }
2507
2508 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2509 report_fatal_error("failed to perform tail call elimination on a call "
2510 "site marked musttail");
2511
2512 // Get a count of how many bytes are to be pushed on the stack.
2513 unsigned NumBytes = CCInfo.getStackSize();
2514
2515 // SPDiff is the byte offset of the call's argument area from the callee's.
2516 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2517 // by this amount for a tail call. In a sibling call it must be 0 because the
2518 // caller will deallocate the entire stack and the callee still expects its
2519 // arguments to begin at SP+0. Completely unused for non-tail calls.
2520 int SPDiff = 0;
2521
2522 if (isTailCall && !isSibCall) {
2523 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2524 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2525
2526 // Since callee will pop argument stack as a tail call, we must keep the
2527 // popped size 16-byte aligned.
2528 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
2529 assert(StackAlign && "data layout string is missing stack alignment");
2530 NumBytes = alignTo(NumBytes, *StackAlign);
2531
2532 // SPDiff will be negative if this tail call requires more space than we
2533 // would automatically have in our incoming argument space. Positive if we
2534 // can actually shrink the stack.
2535 SPDiff = NumReusableBytes - NumBytes;
2536
2537 // If this call requires more stack than we have available from
2538 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2539 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2540 AFI->setArgRegsSaveSize(-SPDiff);
2541 }
2542
2543 if (isSibCall) {
2544 // For sibling tail calls, memory operands are available in our caller's stack.
2545 NumBytes = 0;
2546 } else {
2547 // Adjust the stack pointer for the new arguments...
2548 // These operations are automatically eliminated by the prolog/epilog pass
2549 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2550 }
2551
2553 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2554
2555 RegsToPassVector RegsToPass;
2556 SmallVector<SDValue, 8> MemOpChains;
2557
2558 // If we are doing a tail-call, any byval arguments will be written to stack
2559 // space which was used for incoming arguments. If any the values being used
2560 // are incoming byval arguments to this function, then they might be
2561 // overwritten by the stores of the outgoing arguments. To avoid this, we
2562 // need to make a temporary copy of them in local stack space, then copy back
2563 // to the argument area.
2564 DenseMap<unsigned, SDValue> ByValTemporaries;
2565 SDValue ByValTempChain;
2566 if (isTailCall) {
2567 SmallVector<SDValue, 8> ByValCopyChains;
2568 for (const CCValAssign &VA : ArgLocs) {
2569 unsigned ArgIdx = VA.getValNo();
2570 SDValue Src = OutVals[ArgIdx];
2571 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2572
2573 if (!Flags.isByVal())
2574 continue;
2575
2576 SDValue Dst;
2577 MachinePointerInfo DstInfo;
2578 std::tie(Dst, DstInfo) =
2579 computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2580 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2581
2582 if (Copy == NoCopy) {
2583 // If the argument is already at the correct offset on the stack
2584 // (because we are forwarding a byval argument from our caller), we
2585 // don't need any copying.
2586 continue;
2587 } else if (Copy == CopyOnce) {
2588 // If the argument is in our local stack frame, no other argument
2589 // preparation can clobber it, so we can copy it to the final location
2590 // later.
2591 ByValTemporaries[ArgIdx] = Src;
2592 } else {
2593 assert(Copy == CopyViaTemp && "unexpected enum value");
2594 // If we might be copying this argument from the outgoing argument
2595 // stack area, we need to copy via a temporary in the local stack
2596 // frame.
2597 int TempFrameIdx = MFI.CreateStackObject(
2598 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2599 SDValue Temp =
2600 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2601
2602 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2603 SDValue AlignNode =
2604 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2605
2606 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2607 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2608 ByValCopyChains.push_back(
2609 DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2610 ByValTemporaries[ArgIdx] = Temp;
2611 }
2612 }
2613 if (!ByValCopyChains.empty())
2614 ByValTempChain =
2615 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2616 }
2617
2618 // During a tail call, stores to the argument area must happen after all of
2619 // the function's incoming arguments have been loaded because they may alias.
2620 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2621 // there's no point in doing so repeatedly so this tracks whether that's
2622 // happened yet.
2623 bool AfterFormalArgLoads = false;
2624
2625 // Walk the register/memloc assignments, inserting copies/loads. In the case
2626 // of tail call optimization, arguments are handled later.
2627 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2628 i != e;
2629 ++i, ++realArgIdx) {
2630 CCValAssign &VA = ArgLocs[i];
2631 SDValue Arg = OutVals[realArgIdx];
2632 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2633 bool isByVal = Flags.isByVal();
2634
2635 // Promote the value if needed.
2636 switch (VA.getLocInfo()) {
2637 default: llvm_unreachable("Unknown loc info!");
2638 case CCValAssign::Full: break;
2639 case CCValAssign::SExt:
2640 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2641 break;
2642 case CCValAssign::ZExt:
2643 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2644 break;
2645 case CCValAssign::AExt:
2646 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2647 break;
2648 case CCValAssign::BCvt:
2649 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2650 break;
2651 }
2652
2653 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2654 Chain = DAG.getStackArgumentTokenFactor(Chain);
2655 if (ByValTempChain)
2656 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
2657 ByValTempChain);
2658 AfterFormalArgLoads = true;
2659 }
2660
2661 // f16 arguments have their size extended to 4 bytes and passed as if they
2662 // had been copied to the LSBs of a 32-bit register.
2663 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2664 if (VA.needsCustom() &&
2665 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2666 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2667 } else {
2668 // f16 arguments could have been extended prior to argument lowering.
2669 // Mask them arguments if this is a CMSE nonsecure call.
2670 auto ArgVT = Outs[realArgIdx].ArgVT;
2671 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2672 auto LocBits = VA.getLocVT().getSizeInBits();
2673 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2674 SDValue Mask =
2675 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2676 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2677 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2678 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2679 }
2680 }
2681
2682 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2683 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2684 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2685 DAG.getConstant(0, dl, MVT::i32));
2686 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2687 DAG.getConstant(1, dl, MVT::i32));
2688
2689 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2690 StackPtr, MemOpChains, isTailCall, SPDiff);
2691
2692 VA = ArgLocs[++i]; // skip ahead to next loc
2693 if (VA.isRegLoc()) {
2694 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2695 StackPtr, MemOpChains, isTailCall, SPDiff);
2696 } else {
2697 assert(VA.isMemLoc());
2698 SDValue DstAddr;
2699 MachinePointerInfo DstInfo;
2700 std::tie(DstAddr, DstInfo) =
2701 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2702 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2703 }
2704 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2705 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2706 StackPtr, MemOpChains, isTailCall, SPDiff);
2707 } else if (VA.isRegLoc()) {
2708 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2709 Outs[0].VT == MVT::i32) {
2710 assert(VA.getLocVT() == MVT::i32 &&
2711 "unexpected calling convention register assignment");
2712 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2713 "unexpected use of 'returned'");
2714 isThisReturn = true;
2715 }
2716 const TargetOptions &Options = DAG.getTarget().Options;
2717 if (Options.EmitCallSiteInfo)
2718 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2719 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2720 } else if (isByVal) {
2721 assert(VA.isMemLoc());
2722 unsigned offset = 0;
2723
2724 // True if this byval aggregate will be split between registers
2725 // and memory.
2726 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2727 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2728
2729 SDValue ByValSrc;
2730 bool NeedsStackCopy;
2731 if (ByValTemporaries.contains(realArgIdx)) {
2732 ByValSrc = ByValTemporaries[realArgIdx];
2733 NeedsStackCopy = true;
2734 } else {
2735 ByValSrc = Arg;
2736 NeedsStackCopy = !isTailCall;
2737 }
2738
2739 // If part of the argument is in registers, load them.
2740 if (CurByValIdx < ByValArgsCount) {
2741 unsigned RegBegin, RegEnd;
2742 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2743
2744 EVT PtrVT =
2746 unsigned int i, j;
2747 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2748 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2749 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
2750 SDValue Load =
2751 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2752 DAG.InferPtrAlign(AddArg));
2753 MemOpChains.push_back(Load.getValue(1));
2754 RegsToPass.push_back(std::make_pair(j, Load));
2755 }
2756
2757 // If parameter size outsides register area, "offset" value
2758 // helps us to calculate stack slot for remained part properly.
2759 offset = RegEnd - RegBegin;
2760
2761 CCInfo.nextInRegsParam();
2762 }
2763
2764 // If the memory part of the argument isn't already in the correct place
2765 // (which can happen with tail calls), copy it into the argument area.
2766 if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2767 auto PtrVT = getPointerTy(DAG.getDataLayout());
2768 SDValue Dst;
2769 MachinePointerInfo DstInfo;
2770 std::tie(Dst, DstInfo) =
2771 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2772 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2773 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
2774 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2775 MVT::i32);
2776 SDValue AlignNode =
2777 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2778
2779 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2780 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2781 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2782 Ops));
2783 }
2784 } else {
2785 assert(VA.isMemLoc());
2786 SDValue DstAddr;
2787 MachinePointerInfo DstInfo;
2788 std::tie(DstAddr, DstInfo) =
2789 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2790
2791 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2792 MemOpChains.push_back(Store);
2793 }
2794 }
2795
2796 if (!MemOpChains.empty())
2797 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2798
2799 // Build a sequence of copy-to-reg nodes chained together with token chain
2800 // and flag operands which copy the outgoing args into the appropriate regs.
2801 SDValue InGlue;
2802 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2803 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2804 RegsToPass[i].second, InGlue);
2805 InGlue = Chain.getValue(1);
2806 }
2807
2808 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2809 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2810 // node so that legalize doesn't hack it.
2811 bool isDirect = false;
2812
2814 const GlobalValue *GVal = nullptr;
2815 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2816 GVal = G->getGlobal();
2817 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2818
2819 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2820 bool isLocalARMFunc = false;
2821 auto PtrVt = getPointerTy(DAG.getDataLayout());
2822
2823 if (Subtarget->genLongCalls()) {
2824 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2825 "long-calls codegen is not position independent!");
2826 // Handle a global address or an external symbol. If it's not one of
2827 // those, the target's already in a register, so we don't need to do
2828 // anything extra.
2829 if (isa<GlobalAddressSDNode>(Callee)) {
2830 if (Subtarget->genExecuteOnly()) {
2831 if (Subtarget->useMovt())
2832 ++NumMovwMovt;
2833 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2834 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2835 } else {
2836 // Create a constant pool entry for the callee address
2837 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2839 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2840
2841 // Get the address of the callee into a register
2842 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2843 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2844 Callee = DAG.getLoad(
2845 PtrVt, dl, DAG.getEntryNode(), Addr,
2847 }
2848 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2849 const char *Sym = S->getSymbol();
2850
2851 if (Subtarget->genExecuteOnly()) {
2852 if (Subtarget->useMovt())
2853 ++NumMovwMovt;
2854 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2855 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2856 } else {
2857 // Create a constant pool entry for the callee address
2858 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2860 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2861
2862 // Get the address of the callee into a register
2863 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2864 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2865 Callee = DAG.getLoad(
2866 PtrVt, dl, DAG.getEntryNode(), Addr,
2868 }
2869 }
2870 } else if (isa<GlobalAddressSDNode>(Callee)) {
2871 if (!PreferIndirect) {
2872 isDirect = true;
2873 bool isDef = GVal->isStrongDefinitionForLinker();
2874
2875 // ARM call to a local ARM function is predicable.
2876 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2877 // tBX takes a register source operand.
2878 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2879 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2880 Callee = DAG.getNode(
2881 ARMISD::WrapperPIC, dl, PtrVt,
2882 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2883 Callee = DAG.getLoad(
2884 PtrVt, dl, DAG.getEntryNode(), Callee,
2888 } else if (Subtarget->isTargetCOFF()) {
2889 assert(Subtarget->isTargetWindows() &&
2890 "Windows is the only supported COFF target");
2891 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2892 if (GVal->hasDLLImportStorageClass())
2893 TargetFlags = ARMII::MO_DLLIMPORT;
2894 else if (!TM.shouldAssumeDSOLocal(GVal))
2895 TargetFlags = ARMII::MO_COFFSTUB;
2896 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2897 TargetFlags);
2898 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2899 Callee =
2900 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2901 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2903 } else {
2904 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2905 }
2906 }
2907 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2908 isDirect = true;
2909 // tBX takes a register source operand.
2910 const char *Sym = S->getSymbol();
2911 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2912 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2915 ARMPCLabelIndex, 4);
2916 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2917 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2918 Callee = DAG.getLoad(
2919 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2921 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2922 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2923 } else {
2924 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2925 }
2926 }
2927
2928 if (isCmseNSCall) {
2929 assert(!isARMFunc && !isDirect &&
2930 "Cannot handle call to ARM function or direct call");
2931 if (NumBytes > 0) {
2933 "call to non-secure function would "
2934 "require passing arguments on stack",
2935 dl.getDebugLoc());
2936 DAG.getContext()->diagnose(Diag);
2937 }
2938 if (isStructRet) {
2941 "call to non-secure function would return value through pointer",
2942 dl.getDebugLoc());
2943 DAG.getContext()->diagnose(Diag);
2944 }
2945 }
2946
2947 // FIXME: handle tail calls differently.
2948 unsigned CallOpc;
2949 if (Subtarget->isThumb()) {
2950 if (GuardWithBTI)
2951 CallOpc = ARMISD::t2CALL_BTI;
2952 else if (isCmseNSCall)
2953 CallOpc = ARMISD::tSECALL;
2954 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2955 CallOpc = ARMISD::CALL_NOLINK;
2956 else
2957 CallOpc = ARMISD::CALL;
2958 } else {
2959 if (!isDirect && !Subtarget->hasV5TOps())
2960 CallOpc = ARMISD::CALL_NOLINK;
2961 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2962 // Emit regular call when code size is the priority
2963 !Subtarget->hasMinSize())
2964 // "mov lr, pc; b _foo" to avoid confusing the RSP
2965 CallOpc = ARMISD::CALL_NOLINK;
2966 else
2967 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2968 }
2969
2970 // We don't usually want to end the call-sequence here because we would tidy
2971 // the frame up *after* the call, however in the ABI-changing tail-call case
2972 // we've carefully laid out the parameters so that when sp is reset they'll be
2973 // in the correct location.
2974 if (isTailCall && !isSibCall) {
2975 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2976 InGlue = Chain.getValue(1);
2977 }
2978
2979 std::vector<SDValue> Ops;
2980 Ops.push_back(Chain);
2981 Ops.push_back(Callee);
2982
2983 if (isTailCall) {
2984 Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
2985 }
2986
2987 // Add argument registers to the end of the list so that they are known live
2988 // into the call.
2989 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2990 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2991 RegsToPass[i].second.getValueType()));
2992
2993 // Add a register mask operand representing the call-preserved registers.
2994 const uint32_t *Mask;
2995 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2996 if (isThisReturn) {
2997 // For 'this' returns, use the R0-preserving mask if applicable
2998 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2999 if (!Mask) {
3000 // Set isThisReturn to false if the calling convention is not one that
3001 // allows 'returned' to be modeled in this way, so LowerCallResult does
3002 // not try to pass 'this' straight through
3003 isThisReturn = false;
3004 Mask = ARI->getCallPreservedMask(MF, CallConv);
3005 }
3006 } else
3007 Mask = ARI->getCallPreservedMask(MF, CallConv);
3008
3009 assert(Mask && "Missing call preserved mask for calling convention");
3010 Ops.push_back(DAG.getRegisterMask(Mask));
3011
3012 if (InGlue.getNode())
3013 Ops.push_back(InGlue);
3014
3015 if (isTailCall) {
3017 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
3018 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
3019 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
3020 return Ret;
3021 }
3022
3023 // Returns a chain and a flag for retval copy to use.
3024 Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
3025 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3026 InGlue = Chain.getValue(1);
3027 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
3028
3029 // If we're guaranteeing tail-calls will be honoured, the callee must
3030 // pop its own argument stack on return. But this call is *not* a tail call so
3031 // we need to undo that after it returns to restore the status-quo.
3032 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
3033 uint64_t CalleePopBytes =
3034 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
3035
3036 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
3037 if (!Ins.empty())
3038 InGlue = Chain.getValue(1);
3039
3040 // Handle result values, copying them out of physregs into vregs that we
3041 // return.
3042 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
3043 InVals, isThisReturn,
3044 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
3045}
3046
3047/// HandleByVal - Every parameter *after* a byval parameter is passed
3048/// on the stack. Remember the next parameter register to allocate,
3049/// and then confiscate the rest of the parameter registers to insure
3050/// this.
3051void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
3052 Align Alignment) const {
3053 // Byval (as with any stack) slots are always at least 4 byte aligned.
3054 Alignment = std::max(Alignment, Align(4));
3055
3057 if (!Reg)
3058 return;
3059
3060 unsigned AlignInRegs = Alignment.value() / 4;
3061 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
3062 for (unsigned i = 0; i < Waste; ++i)
3063 Reg = State->AllocateReg(GPRArgRegs);
3064
3065 if (!Reg)
3066 return;
3067
3068 unsigned Excess = 4 * (ARM::R4 - Reg);
3069
3070 // Special case when NSAA != SP and parameter size greater than size of
3071 // all remained GPR regs. In that case we can't split parameter, we must
3072 // send it to stack. We also must set NCRN to R4, so waste all
3073 // remained registers.
3074 const unsigned NSAAOffset = State->getStackSize();
3075 if (NSAAOffset != 0 && Size > Excess) {
3076 while (State->AllocateReg(GPRArgRegs))
3077 ;
3078 return;
3079 }
3080
3081 // First register for byval parameter is the first register that wasn't
3082 // allocated before this method call, so it would be "reg".
3083 // If parameter is small enough to be saved in range [reg, r4), then
3084 // the end (first after last) register would be reg + param-size-in-regs,
3085 // else parameter would be splitted between registers and stack,
3086 // end register would be r4 in this case.
3087 unsigned ByValRegBegin = Reg;
3088 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
3089 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
3090 // Note, first register is allocated in the beginning of function already,
3091 // allocate remained amount of registers we need.
3092 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
3093 State->AllocateReg(GPRArgRegs);
3094 // A byval parameter that is split between registers and memory needs its
3095 // size truncated here.
3096 // In the case where the entire structure fits in registers, we set the
3097 // size in memory to zero.
3098 Size = std::max<int>(Size - Excess, 0);
3099}
3100
3101/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3102/// for tail call optimization. Targets which want to do tail call
3103/// optimization should implement this function. Note that this function also
3104/// processes musttail calls, so when this function returns false on a valid
3105/// musttail call, a fatal backend error occurs.
3106bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3108 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3109 CallingConv::ID CalleeCC = CLI.CallConv;
3110 SDValue Callee = CLI.Callee;
3111 bool isVarArg = CLI.IsVarArg;
3112 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3113 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3115 const SelectionDAG &DAG = CLI.DAG;
3117 const Function &CallerF = MF.getFunction();
3118 CallingConv::ID CallerCC = CallerF.getCallingConv();
3119
3120 assert(Subtarget->supportsTailCall());
3121
3122 // Indirect tail-calls require a register to hold the target address. That
3123 // register must be:
3124 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
3125 // * Not callee-saved, so must be one of r0-r3 or r12.
3126 // * Not used to hold an argument to the tail-called function, which might be
3127 // in r0-r3.
3128 // * Not used to hold the return address authentication code, which is in r12
3129 // if enabled.
3130 // Sometimes, no register matches all of these conditions, so we can't do a
3131 // tail-call.
3132 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3133 SmallSet<MCPhysReg, 5> AddressRegisters;
3134 for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3135 AddressRegisters.insert(R);
3136 if (!(Subtarget->isThumb1Only() ||
3138 AddressRegisters.insert(ARM::R12);
3139 for (const CCValAssign &AL : ArgLocs)
3140 if (AL.isRegLoc())
3141 AddressRegisters.erase(AL.getLocReg());
3142 if (AddressRegisters.empty()) {
3143 LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
3144 return false;
3145 }
3146 }
3147
3148 // Look for obvious safe cases to perform tail call optimization that do not
3149 // require ABI changes. This is what gcc calls sibcall.
3150
3151 // Exception-handling functions need a special set of instructions to indicate
3152 // a return to the hardware. Tail-calling another function would probably
3153 // break this.
3154 if (CallerF.hasFnAttribute("interrupt")) {
3155 LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
3156 return false;
3157 }
3158
3159 if (canGuaranteeTCO(CalleeCC,
3160 getTargetMachine().Options.GuaranteedTailCallOpt)) {
3161 LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3162 << " (guaranteed tail-call CC)\n");
3163 return CalleeCC == CallerCC;
3164 }
3165
3166 // Also avoid sibcall optimization if either caller or callee uses struct
3167 // return semantics.
3168 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3169 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3170 if (isCalleeStructRet != isCallerStructRet) {
3171 LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3172 return false;
3173 }
3174
3175 // Externally-defined functions with weak linkage should not be
3176 // tail-called on ARM when the OS does not support dynamic
3177 // pre-emption of symbols, as the AAELF spec requires normal calls
3178 // to undefined weak functions to be replaced with a NOP or jump to the
3179 // next instruction. The behaviour of branch instructions in this
3180 // situation (as used for tail calls) is implementation-defined, so we
3181 // cannot rely on the linker replacing the tail call with a return.
3182 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3183 const GlobalValue *GV = G->getGlobal();
3185 if (GV->hasExternalWeakLinkage() &&
3186 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3187 TT.isOSBinFormatMachO())) {
3188 LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3189 return false;
3190 }
3191 }
3192
3193 // Check that the call results are passed in the same way.
3194 LLVMContext &C = *DAG.getContext();
3196 getEffectiveCallingConv(CalleeCC, isVarArg),
3197 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3198 CCAssignFnForReturn(CalleeCC, isVarArg),
3199 CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3200 LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3201 return false;
3202 }
3203 // The callee has to preserve all registers the caller needs to preserve.
3204 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3205 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3206 if (CalleeCC != CallerCC) {
3207 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3208 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3209 LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3210 return false;
3211 }
3212 }
3213
3214 // If Caller's vararg argument has been split between registers and stack, do
3215 // not perform tail call, since part of the argument is in caller's local
3216 // frame.
3217 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3218 if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3219 LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
3220 return false;
3221 }
3222
3223 // If the callee takes no arguments then go on to check the results of the
3224 // call.
3225 const MachineRegisterInfo &MRI = MF.getRegInfo();
3226 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3227 LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3228 return false;
3229 }
3230
3231 // If the stack arguments for this call do not fit into our own save area then
3232 // the call cannot be made tail.
3233 if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3234 return false;
3235
3236 LLVM_DEBUG(dbgs() << "true\n");
3237 return true;
3238}
3239
3240bool
3241ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3242 MachineFunction &MF, bool isVarArg,
3244 LLVMContext &Context) const {
3246 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3247 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3248}
3249
3251 const SDLoc &DL, SelectionDAG &DAG) {
3252 const MachineFunction &MF = DAG.getMachineFunction();
3253 const Function &F = MF.getFunction();
3254
3255 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3256
3257 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3258 // version of the "preferred return address". These offsets affect the return
3259 // instruction if this is a return from PL1 without hypervisor extensions.
3260 // IRQ/FIQ: +4 "subs pc, lr, #4"
3261 // SWI: 0 "subs pc, lr, #0"
3262 // ABORT: +4 "subs pc, lr, #4"
3263 // UNDEF: +4/+2 "subs pc, lr, #0"
3264 // UNDEF varies depending on where the exception came from ARM or Thumb
3265 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3266
3267 int64_t LROffset;
3268 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3269 IntKind == "ABORT")
3270 LROffset = 4;
3271 else if (IntKind == "SWI" || IntKind == "UNDEF")
3272 LROffset = 0;
3273 else
3274 report_fatal_error("Unsupported interrupt attribute. If present, value "
3275 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3276
3277 RetOps.insert(RetOps.begin() + 1,
3278 DAG.getConstant(LROffset, DL, MVT::i32, false));
3279
3280 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3281}
3282
3283SDValue
3284ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3285 bool isVarArg,
3287 const SmallVectorImpl<SDValue> &OutVals,
3288 const SDLoc &dl, SelectionDAG &DAG) const {
3289 // CCValAssign - represent the assignment of the return value to a location.
3291
3292 // CCState - Info about the registers and stack slots.
3293 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3294 *DAG.getContext());
3295
3296 // Analyze outgoing return values.
3297 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3298
3299 SDValue Glue;
3301 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3302 bool isLittleEndian = Subtarget->isLittle();
3303
3306 AFI->setReturnRegsCount(RVLocs.size());
3307
3308 // Report error if cmse entry function returns structure through first ptr arg.
3309 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3310 // Note: using an empty SDLoc(), as the first line of the function is a
3311 // better place to report than the last line.
3314 "secure entry function would return value through pointer",
3315 SDLoc().getDebugLoc());
3316 DAG.getContext()->diagnose(Diag);
3317 }
3318
3319 // Copy the result values into the output registers.
3320 for (unsigned i = 0, realRVLocIdx = 0;
3321 i != RVLocs.size();
3322 ++i, ++realRVLocIdx) {
3323 CCValAssign &VA = RVLocs[i];
3324 assert(VA.isRegLoc() && "Can only return in registers!");
3325
3326 SDValue Arg = OutVals[realRVLocIdx];
3327 bool ReturnF16 = false;
3328
3329 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3330 // Half-precision return values can be returned like this:
3331 //
3332 // t11 f16 = fadd ...
3333 // t12: i16 = bitcast t11
3334 // t13: i32 = zero_extend t12
3335 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3336 //
3337 // to avoid code generation for bitcasts, we simply set Arg to the node
3338 // that produces the f16 value, t11 in this case.
3339 //
3340 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3341 SDValue ZE = Arg.getOperand(0);
3342 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3343 SDValue BC = ZE.getOperand(0);
3344 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3345 Arg = BC.getOperand(0);
3346 ReturnF16 = true;
3347 }
3348 }
3349 }
3350 }
3351
3352 switch (VA.getLocInfo()) {
3353 default: llvm_unreachable("Unknown loc info!");
3354 case CCValAssign::Full: break;
3355 case CCValAssign::BCvt:
3356 if (!ReturnF16)
3357 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3358 break;
3359 }
3360
3361 // Mask f16 arguments if this is a CMSE nonsecure entry.
3362 auto RetVT = Outs[realRVLocIdx].ArgVT;
3363 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3364 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3365 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3366 } else {
3367 auto LocBits = VA.getLocVT().getSizeInBits();
3368 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3369 SDValue Mask =
3370 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3371 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3372 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3373 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3374 }
3375 }
3376
3377 if (VA.needsCustom() &&
3378 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3379 if (VA.getLocVT() == MVT::v2f64) {
3380 // Extract the first half and return it in two registers.
3381 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3382 DAG.getConstant(0, dl, MVT::i32));
3383 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3384 DAG.getVTList(MVT::i32, MVT::i32), Half);
3385
3386 Chain =
3387 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3388 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3389 Glue = Chain.getValue(1);
3390 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3391 VA = RVLocs[++i]; // skip ahead to next loc
3392 Chain =
3393 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3394 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3395 Glue = Chain.getValue(1);
3396 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3397 VA = RVLocs[++i]; // skip ahead to next loc
3398
3399 // Extract the 2nd half and fall through to handle it as an f64 value.
3400 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3401 DAG.getConstant(1, dl, MVT::i32));
3402 }
3403 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3404 // available.
3405 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3406 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3407 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3408 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3409 Glue = Chain.getValue(1);
3410 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3411 VA = RVLocs[++i]; // skip ahead to next loc
3412 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3413 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3414 } else
3415 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3416
3417 // Guarantee that all emitted copies are
3418 // stuck together, avoiding something bad.
3419 Glue = Chain.getValue(1);
3420 RetOps.push_back(DAG.getRegister(
3421 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3422 }
3423 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3424 const MCPhysReg *I =
3425 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3426 if (I) {
3427 for (; *I; ++I) {
3428 if (ARM::GPRRegClass.contains(*I))
3429 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3430 else if (ARM::DPRRegClass.contains(*I))
3432 else
3433 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3434 }
3435 }
3436
3437 // Update chain and glue.
3438 RetOps[0] = Chain;
3439 if (Glue.getNode())
3440 RetOps.push_back(Glue);
3441
3442 // CPUs which aren't M-class use a special sequence to return from
3443 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3444 // though we use "subs pc, lr, #N").
3445 //
3446 // M-class CPUs actually use a normal return sequence with a special
3447 // (hardware-provided) value in LR, so the normal code path works.
3448 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3449 !Subtarget->isMClass()) {
3450 if (Subtarget->isThumb1Only())
3451 report_fatal_error("interrupt attribute is not supported in Thumb1");
3452 return LowerInterruptReturn(RetOps, dl, DAG);
3453 }
3454
3457 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3458}
3459
3460bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3461 if (N->getNumValues() != 1)
3462 return false;
3463 if (!N->hasNUsesOfValue(1, 0))
3464 return false;
3465
3466 SDValue TCChain = Chain;
3467 SDNode *Copy = *N->user_begin();
3468 if (Copy->getOpcode() == ISD::CopyToReg) {
3469 // If the copy has a glue operand, we conservatively assume it isn't safe to
3470 // perform a tail call.
3471 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3472 return false;
3473 TCChain = Copy->getOperand(0);
3474 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3475 SDNode *VMov = Copy;
3476 // f64 returned in a pair of GPRs.
3478 for (SDNode *U : VMov->users()) {
3479 if (U->getOpcode() != ISD::CopyToReg)
3480 return false;
3481 Copies.insert(U);
3482 }
3483 if (Copies.size() > 2)
3484 return false;
3485
3486 for (SDNode *U : VMov->users()) {
3487 SDValue UseChain = U->getOperand(0);
3488 if (Copies.count(UseChain.getNode()))
3489 // Second CopyToReg
3490 Copy = U;
3491 else {
3492 // We are at the top of this chain.
3493 // If the copy has a glue operand, we conservatively assume it
3494 // isn't safe to perform a tail call.
3495 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3496 return false;
3497 // First CopyToReg
3498 TCChain = UseChain;
3499 }
3500 }
3501 } else if (Copy->getOpcode() == ISD::BITCAST) {
3502 // f32 returned in a single GPR.
3503 if (!Copy->hasOneUse())
3504 return false;
3505 Copy = *Copy->user_begin();
3506 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3507 return false;
3508 // If the copy has a glue operand, we conservatively assume it isn't safe to
3509 // perform a tail call.
3510 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3511 return false;
3512 TCChain = Copy->getOperand(0);
3513 } else {
3514 return false;
3515 }
3516
3517 bool HasRet = false;
3518 for (const SDNode *U : Copy->users()) {
3519 if (U->getOpcode() != ARMISD::RET_GLUE &&
3520 U->getOpcode() != ARMISD::INTRET_GLUE)
3521 return false;
3522 HasRet = true;
3523 }
3524
3525 if (!HasRet)
3526 return false;
3527
3528 Chain = TCChain;
3529 return true;
3530}
3531
3532bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3533 if (!Subtarget->supportsTailCall())
3534 return false;
3535
3536 if (!CI->isTailCall())
3537 return false;
3538
3539 return true;
3540}
3541
3542// Trying to write a 64 bit value so need to split into two 32 bit values first,
3543// and pass the lower and high parts through.
3545 SDLoc DL(Op);
3546 SDValue WriteValue = Op->getOperand(2);
3547
3548 // This function is only supposed to be called for i64 type argument.
3549 assert(WriteValue.getValueType() == MVT::i64
3550 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3551
3552 SDValue Lo, Hi;
3553 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3554 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3555 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3556}
3557
3558// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3559// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3560// one of the above mentioned nodes. It has to be wrapped because otherwise
3561// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3562// be used to form addressing mode. These wrapped nodes will be selected
3563// into MOVi.
3564SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3565 SelectionDAG &DAG) const {
3566 EVT PtrVT = Op.getValueType();
3567 // FIXME there is no actual debug info here
3568 SDLoc dl(Op);
3569 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3570 SDValue Res;
3571
3572 // When generating execute-only code Constant Pools must be promoted to the
3573 // global data section. It's a bit ugly that we can't share them across basic
3574 // blocks, but this way we guarantee that execute-only behaves correct with
3575 // position-independent addressing modes.
3576 if (Subtarget->genExecuteOnly()) {
3577 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3578 auto T = const_cast<Type*>(CP->getType());
3579 auto C = const_cast<Constant*>(CP->getConstVal());
3580 auto M = const_cast<Module*>(DAG.getMachineFunction().
3582 auto GV = new GlobalVariable(
3583 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3586 Twine(AFI->createPICLabelUId())
3587 );
3588 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3589 dl, PtrVT);
3590 return LowerGlobalAddress(GA, DAG);
3591 }
3592
3593 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3594 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3595 Align CPAlign = CP->getAlign();
3596 if (Subtarget->isThumb1Only())
3597 CPAlign = std::max(CPAlign, Align(4));
3598 if (CP->isMachineConstantPoolEntry())
3599 Res =
3600 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3601 else
3602 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3603 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3604}
3605
3607 // If we don't have a 32-bit pc-relative branch instruction then the jump
3608 // table consists of block addresses. Usually this is inline, but for
3609 // execute-only it must be placed out-of-line.
3610 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3613}
3614
3615SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3616 SelectionDAG &DAG) const {
3619 unsigned ARMPCLabelIndex = 0;
3620 SDLoc DL(Op);
3621 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3622 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3623 SDValue CPAddr;
3624 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3625 if (!IsPositionIndependent) {
3626 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3627 } else {
3628 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3629 ARMPCLabelIndex = AFI->createPICLabelUId();
3631 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3632 ARMCP::CPBlockAddress, PCAdj);
3633 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3634 }
3635 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3636 SDValue Result = DAG.getLoad(
3637 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3639 if (!IsPositionIndependent)
3640 return Result;
3641 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3642 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3643}
3644
3645/// Convert a TLS address reference into the correct sequence of loads
3646/// and calls to compute the variable's address for Darwin, and return an
3647/// SDValue containing the final node.
3648
3649/// Darwin only has one TLS scheme which must be capable of dealing with the
3650/// fully general situation, in the worst case. This means:
3651/// + "extern __thread" declaration.
3652/// + Defined in a possibly unknown dynamic library.
3653///
3654/// The general system is that each __thread variable has a [3 x i32] descriptor
3655/// which contains information used by the runtime to calculate the address. The
3656/// only part of this the compiler needs to know about is the first word, which
3657/// contains a function pointer that must be called with the address of the
3658/// entire descriptor in "r0".
3659///
3660/// Since this descriptor may be in a different unit, in general access must
3661/// proceed along the usual ARM rules. A common sequence to produce is:
3662///
3663/// movw rT1, :lower16:_var$non_lazy_ptr
3664/// movt rT1, :upper16:_var$non_lazy_ptr
3665/// ldr r0, [rT1]
3666/// ldr rT2, [r0]
3667/// blx rT2
3668/// [...address now in r0...]
3669SDValue
3670ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3671 SelectionDAG &DAG) const {
3672 assert(Subtarget->isTargetDarwin() &&
3673 "This function expects a Darwin target");
3674 SDLoc DL(Op);
3675
3676 // First step is to get the address of the actua global symbol. This is where
3677 // the TLS descriptor lives.
3678 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3679
3680 // The first entry in the descriptor is a function pointer that we must call
3681 // to obtain the address of the variable.
3682 SDValue Chain = DAG.getEntryNode();
3683 SDValue FuncTLVGet = DAG.getLoad(
3684 MVT::i32, DL, Chain, DescAddr,
3688 Chain = FuncTLVGet.getValue(1);
3689
3691 MachineFrameInfo &MFI = F.getFrameInfo();
3692 MFI.setAdjustsStack(true);
3693
3694 // TLS calls preserve all registers except those that absolutely must be
3695 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3696 // silly).
3697 auto TRI =
3699 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3701
3702 // Finally, we can make the call. This is just a degenerate version of a
3703 // normal AArch64 call node: r0 takes the address of the descriptor, and
3704 // returns the address of the variable in this thread.
3705 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3706 Chain =
3707 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3708 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3709 DAG.getRegisterMask(Mask), Chain.getValue(1));
3710 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3711}
3712
3713SDValue
3714ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3715 SelectionDAG &DAG) const {
3716 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3717
3718 SDValue Chain = DAG.getEntryNode();
3719 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3720 SDLoc DL(Op);
3721
3722 // Load the current TEB (thread environment block)
3723 SDValue Ops[] = {Chain,
3724 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3725 DAG.getTargetConstant(15, DL, MVT::i32),
3726 DAG.getTargetConstant(0, DL, MVT::i32),
3727 DAG.getTargetConstant(13, DL, MVT::i32),
3728 DAG.getTargetConstant(0, DL, MVT::i32),
3729 DAG.getTargetConstant(2, DL, MVT::i32)};
3730 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3731 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3732
3733 SDValue TEB = CurrentTEB.getValue(0);
3734 Chain = CurrentTEB.getValue(1);
3735
3736 // Load the ThreadLocalStoragePointer from the TEB
3737 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3738 SDValue TLSArray =
3739 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3740 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3741
3742 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3743 // offset into the TLSArray.
3744
3745 // Load the TLS index from the C runtime
3746 SDValue TLSIndex =
3747 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3748 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3749 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3750
3751 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3752 DAG.getConstant(2, DL, MVT::i32));
3753 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3754 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3756
3757 // Get the offset of the start of the .tls section (section base)
3758 const auto *GA = cast<GlobalAddressSDNode>(Op);
3759 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3760 SDValue Offset = DAG.getLoad(
3761 PtrVT, DL, Chain,
3762 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3763 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3765
3766 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3767}
3768
3769// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3770SDValue
3771ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3772 SelectionDAG &DAG) const {
3773 SDLoc dl(GA);
3774 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3775 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3778 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3780 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3781 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3782 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3783 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3784 Argument = DAG.getLoad(
3785 PtrVT, dl, DAG.getEntryNode(), Argument,
3787 SDValue Chain = Argument.getValue(1);
3788
3789 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3790 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3791
3792 // call __tls_get_addr.
3794 ArgListEntry Entry;
3795 Entry.Node = Argument;
3796 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3797 Args.push_back(Entry);
3798
3799 // FIXME: is there useful debug info available here?
3801 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3803 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3804
3805 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3806 return CallResult.first;
3807}
3808
3809// Lower ISD::GlobalTLSAddress using the "initial exec" or
3810// "local exec" model.
3811SDValue
3812ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3813 SelectionDAG &DAG,
3814 TLSModel::Model model) const {
3815 const GlobalValue *GV = GA->getGlobal();
3816 SDLoc dl(GA);
3818 SDValue Chain = DAG.getEntryNode();
3819 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3820 // Get the Thread Pointer
3822
3823 if (model == TLSModel::InitialExec) {
3826 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3827 // Initial exec model.
3828 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3830 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3832 true);
3833 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3834 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3835 Offset = DAG.getLoad(
3836 PtrVT, dl, Chain, Offset,
3838 Chain = Offset.getValue(1);
3839
3840 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3841 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3842
3843 Offset = DAG.getLoad(
3844 PtrVT, dl, Chain, Offset,
3846 } else {
3847 // local exec model
3848 assert(model == TLSModel::LocalExec);
3851 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3852 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3853 Offset = DAG.getLoad(
3854 PtrVT, dl, Chain, Offset,
3856 }
3857
3858 // The address of the thread local variable is the add of the thread
3859 // pointer with the offset of the variable.
3860 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3861}
3862
3863SDValue
3864ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3865 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3866 if (DAG.getTarget().useEmulatedTLS())
3867 return LowerToTLSEmulatedModel(GA, DAG);
3868
3869 if (Subtarget->isTargetDarwin())
3870 return LowerGlobalTLSAddressDarwin(Op, DAG);
3871
3872 if (Subtarget->isTargetWindows())
3873 return LowerGlobalTLSAddressWindows(Op, DAG);
3874
3875 // TODO: implement the "local dynamic" model
3876 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3878
3879 switch (model) {
3882 return LowerToTLSGeneralDynamicModel(GA, DAG);
3885 return LowerToTLSExecModels(GA, DAG, model);
3886 }
3887 llvm_unreachable("bogus TLS model");
3888}
3889
3890/// Return true if all users of V are within function F, looking through
3891/// ConstantExprs.
3892static bool allUsersAreInFunction(const Value *V, const Function *F) {
3893 SmallVector<const User*,4> Worklist(V->users());
3894 while (!Worklist.empty()) {
3895 auto *U = Worklist.pop_back_val();
3896 if (isa<ConstantExpr>(U)) {
3897 append_range(Worklist, U->users());
3898 continue;
3899 }
3900
3901 auto *I = dyn_cast<Instruction>(U);
3902 if (!I || I->getParent()->getParent() != F)
3903 return false;
3904 }
3905 return true;
3906}
3907
3909 const GlobalValue *GV, SelectionDAG &DAG,
3910 EVT PtrVT, const SDLoc &dl) {
3911 // If we're creating a pool entry for a constant global with unnamed address,
3912 // and the global is small enough, we can emit it inline into the constant pool
3913 // to save ourselves an indirection.
3914 //
3915 // This is a win if the constant is only used in one function (so it doesn't
3916 // need to be duplicated) or duplicating the constant wouldn't increase code
3917 // size (implying the constant is no larger than 4 bytes).
3918 const Function &F = DAG.getMachineFunction().getFunction();
3919
3920 // We rely on this decision to inline being idemopotent and unrelated to the
3921 // use-site. We know that if we inline a variable at one use site, we'll
3922 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3923 // doesn't know about this optimization, so bail out if it's enabled else
3924 // we could decide to inline here (and thus never emit the GV) but require
3925 // the GV from fast-isel generated code.
3928 return SDValue();
3929
3930 auto *GVar = dyn_cast<GlobalVariable>(GV);
3931 if (!GVar || !GVar->hasInitializer() ||
3932 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3933 !GVar->hasLocalLinkage())
3934 return SDValue();
3935
3936 // If we inline a value that contains relocations, we move the relocations
3937 // from .data to .text. This is not allowed in position-independent code.
3938 auto *Init = GVar->getInitializer();
3939 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3940 Init->needsDynamicRelocation())
3941 return SDValue();
3942
3943 // The constant islands pass can only really deal with alignment requests
3944 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3945 // any type wanting greater alignment requirements than 4 bytes. We also
3946 // can only promote constants that are multiples of 4 bytes in size or
3947 // are paddable to a multiple of 4. Currently we only try and pad constants
3948 // that are strings for simplicity.
3949 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3950 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3951 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3952 unsigned RequiredPadding = 4 - (Size % 4);
3953 bool PaddingPossible =
3954 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3955 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3956 Size == 0)
3957 return SDValue();
3958
3959 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3962
3963 // We can't bloat the constant pool too much, else the ConstantIslands pass
3964 // may fail to converge. If we haven't promoted this global yet (it may have
3965 // multiple uses), and promoting it would increase the constant pool size (Sz
3966 // > 4), ensure we have space to do so up to MaxTotal.
3967 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3968 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3970 return SDValue();
3971
3972 // This is only valid if all users are in a single function; we can't clone
3973 // the constant in general. The LLVM IR unnamed_addr allows merging
3974 // constants, but not cloning them.
3975 //
3976 // We could potentially allow cloning if we could prove all uses of the
3977 // constant in the current function don't care about the address, like
3978 // printf format strings. But that isn't implemented for now.
3979 if (!allUsersAreInFunction(GVar, &F))
3980 return SDValue();
3981
3982 // We're going to inline this global. Pad it out if needed.
3983 if (RequiredPadding != 4) {
3984 StringRef S = CDAInit->getAsString();
3985
3987 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3988 while (RequiredPadding--)
3989 V.push_back(0);
3991 }
3992
3993 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3994 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3995 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3998 PaddedSize - 4);
3999 }
4000 ++NumConstpoolPromoted;
4001 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4002}
4003
4005 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
4006 if (!(GV = GA->getAliaseeObject()))
4007 return false;
4008 if (const auto *V = dyn_cast<GlobalVariable>(GV))
4009 return V->isConstant();
4010 return isa<Function>(GV);
4011}
4012
4013SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
4014 SelectionDAG &DAG) const {
4015 switch (Subtarget->getTargetTriple().getObjectFormat()) {
4016 default: llvm_unreachable("unknown object format");
4017 case Triple::COFF:
4018 return LowerGlobalAddressWindows(Op, DAG);
4019 case Triple::ELF:
4020 return LowerGlobalAddressELF(Op, DAG);
4021 case Triple::MachO:
4022 return LowerGlobalAddressDarwin(Op, DAG);
4023 }
4024}
4025
4026SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
4027 SelectionDAG &DAG) const {
4028 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4029 SDLoc dl(Op);
4030 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4031 bool IsRO = isReadOnly(GV);
4032
4033 // promoteToConstantPool only if not generating XO text section
4034 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
4035 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
4036 return V;
4037
4038 if (isPositionIndependent()) {
4040 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
4041 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
4042 if (!GV->isDSOLocal())
4043 Result =
4044 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4046 return Result;
4047 } else if (Subtarget->isROPI() && IsRO) {
4048 // PC-relative.
4049 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
4050 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
4051 return Result;
4052 } else if (Subtarget->isRWPI() && !IsRO) {
4053 // SB-relative.
4054 SDValue RelAddr;
4055 if (Subtarget->useMovt()) {
4056 ++NumMovwMovt;
4057 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
4058 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
4059 } else { // use literal pool for address constant
4062 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4063 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4064 RelAddr = DAG.getLoad(
4065 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4067 }
4068 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
4069 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
4070 return Result;
4071 }
4072
4073 // If we have T2 ops, we can materialize the address directly via movt/movw
4074 // pair. This is always cheaper. If need to generate Execute Only code, and we
4075 // only have Thumb1 available, we can't use a constant pool and are forced to
4076 // use immediate relocations.
4077 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
4078 if (Subtarget->useMovt())
4079 ++NumMovwMovt;
4080 // FIXME: Once remat is capable of dealing with instructions with register
4081 // operands, expand this into two nodes.
4082 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
4083 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
4084 } else {
4085 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
4086 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4087 return DAG.getLoad(
4088 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4090 }
4091}
4092
4093SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
4094 SelectionDAG &DAG) const {
4095 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4096 "ROPI/RWPI not currently supported for Darwin");
4097 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4098 SDLoc dl(Op);
4099 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4100
4101 if (Subtarget->useMovt())
4102 ++NumMovwMovt;
4103
4104 // FIXME: Once remat is capable of dealing with instructions with register
4105 // operands, expand this into multiple nodes
4106 unsigned Wrapper =
4108
4109 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4110 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4111
4112 if (Subtarget->isGVIndirectSymbol(GV))
4113 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4115 return Result;
4116}
4117
4118SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4119 SelectionDAG &DAG) const {
4120 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4121 assert(Subtarget->useMovt() &&
4122 "Windows on ARM expects to use movw/movt");
4123 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4124 "ROPI/RWPI not currently supported for Windows");
4125
4127 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4128 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4129 if (GV->hasDLLImportStorageClass())
4130 TargetFlags = ARMII::MO_DLLIMPORT;
4131 else if (!TM.shouldAssumeDSOLocal(GV))
4132 TargetFlags = ARMII::MO_COFFSTUB;
4133 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4135 SDLoc DL(Op);
4136
4137 ++NumMovwMovt;
4138
4139 // FIXME: Once remat is capable of dealing with instructions with register
4140 // operands, expand this into two nodes.
4141 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4142 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4143 TargetFlags));
4144 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4145 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4147 return Result;
4148}
4149
4150SDValue
4151ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4152 SDLoc dl(Op);
4153 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4154 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4155 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4156 Op.getOperand(1), Val);
4157}
4158
4159SDValue
4160ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4161 SDLoc dl(Op);
4162 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4163 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4164}
4165
4166SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4167 SelectionDAG &DAG) const {
4168 SDLoc dl(Op);
4169 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4170 Op.getOperand(0));
4171}
4172
4173SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4174 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4175 unsigned IntNo =
4176 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4177 switch (IntNo) {
4178 default:
4179 return SDValue(); // Don't custom lower most intrinsics.
4180 case Intrinsic::arm_gnu_eabi_mcount: {
4182 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4183 SDLoc dl(Op);
4184 SDValue Chain = Op.getOperand(0);
4185 // call "\01__gnu_mcount_nc"
4186 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4187 const uint32_t *Mask =
4189 assert(Mask && "Missing call preserved mask for calling convention");
4190 // Mark LR an implicit live-in.
4191 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4192 SDValue ReturnAddress =
4193 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4194 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4195 SDValue Callee =
4196 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4198 if (Subtarget->isThumb())
4199 return SDValue(
4200 DAG.getMachineNode(
4201 ARM::tBL_PUSHLR, dl, ResultTys,
4202 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4203 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4204 0);
4205 return SDValue(
4206 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4207 {ReturnAddress, Callee, RegisterMask, Chain}),
4208 0);
4209 }
4210 }
4211}
4212
4213SDValue
4214ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4215 const ARMSubtarget *Subtarget) const {
4216 unsigned IntNo = Op.getConstantOperandVal(0);
4217 SDLoc dl(Op);
4218 switch (IntNo) {
4219 default: return SDValue(); // Don't custom lower most intrinsics.
4220 case Intrinsic::thread_pointer: {
4221 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4222 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4223 }
4224 case Intrinsic::arm_cls: {
4225 const SDValue &Operand = Op.getOperand(1);
4226 const EVT VTy = Op.getValueType();
4227 SDValue SRA =
4228 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4229 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4230 SDValue SHL =
4231 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4232 SDValue OR =
4233 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4234 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4235 return Result;
4236 }
4237 case Intrinsic::arm_cls64: {
4238 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4239 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4240 const SDValue &Operand = Op.getOperand(1);
4241 const EVT VTy = Op.getValueType();
4242 SDValue Lo, Hi;
4243 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4244 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4245 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4246 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4247 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4248 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4249 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4250 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4251 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4252 SDValue CheckLo =
4253 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4254 SDValue HiIsZero =
4255 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4256 SDValue AdjustedLo =
4257 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4258 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4259 SDValue Result =
4260 DAG.getSelect(dl, VTy, CheckLo,
4261 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4262 return Result;
4263 }
4264 case Intrinsic::eh_sjlj_lsda: {
4267 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4268 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4269 SDValue CPAddr;
4270 bool IsPositionIndependent = isPositionIndependent();
4271 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4273 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4274 ARMCP::CPLSDA, PCAdj);
4275 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4276 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4277 SDValue Result = DAG.getLoad(
4278 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4280
4281 if (IsPositionIndependent) {
4282 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4283 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4284 }
4285 return Result;
4286 }
4287 case Intrinsic::arm_neon_vabs:
4288 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4289 Op.getOperand(1));
4290 case Intrinsic::arm_neon_vabds:
4291 if (Op.getValueType().isInteger())
4292 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4293 Op.getOperand(1), Op.getOperand(2));
4294 return SDValue();
4295 case Intrinsic::arm_neon_vabdu:
4296 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4297 Op.getOperand(1), Op.getOperand(2));
4298 case Intrinsic::arm_neon_vmulls:
4299 case Intrinsic::arm_neon_vmullu: {
4300 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4302 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4303 Op.getOperand(1), Op.getOperand(2));
4304 }
4305 case Intrinsic::arm_neon_vminnm:
4306 case Intrinsic::arm_neon_vmaxnm: {
4307 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4309 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4310 Op.getOperand(1), Op.getOperand(2));
4311 }
4312 case Intrinsic::arm_neon_vminu:
4313 case Intrinsic::arm_neon_vmaxu: {
4314 if (Op.getValueType().isFloatingPoint())
4315 return SDValue();
4316 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4317 ? ISD::UMIN : ISD::UMAX;
4318 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4319 Op.getOperand(1), Op.getOperand(2));
4320 }
4321 case Intrinsic::arm_neon_vmins:
4322 case Intrinsic::arm_neon_vmaxs: {
4323 // v{min,max}s is overloaded between signed integers and floats.
4324 if (!Op.getValueType().isFloatingPoint()) {
4325 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4326 ? ISD::SMIN : ISD::SMAX;
4327 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4328 Op.getOperand(1), Op.getOperand(2));
4329 }
4330 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4332 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4333 Op.getOperand(1), Op.getOperand(2));
4334 }
4335 case Intrinsic::arm_neon_vtbl1:
4336 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4337 Op.getOperand(1), Op.getOperand(2));
4338 case Intrinsic::arm_neon_vtbl2:
4339 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4340 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4341 case Intrinsic::arm_mve_pred_i2v:
4342 case Intrinsic::arm_mve_pred_v2i:
4343 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4344 Op.getOperand(1));
4345 case Intrinsic::arm_mve_vreinterpretq:
4346 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4347 Op.getOperand(1));
4348 case Intrinsic::arm_mve_lsll:
4349 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4350 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4351 case Intrinsic::arm_mve_asrl:
4352 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4353 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4354 }
4355}
4356
4358 const ARMSubtarget *Subtarget) {
4359 SDLoc dl(Op);
4360 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4361 if (SSID == SyncScope::SingleThread)
4362 return Op;
4363
4364 if (!Subtarget->hasDataBarrier()) {
4365 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4366 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4367 // here.
4368 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4369 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4370 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4371 DAG.getConstant(0, dl, MVT::i32));
4372 }
4373
4374 AtomicOrdering Ord =
4375 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4377 if (Subtarget->isMClass()) {
4378 // Only a full system barrier exists in the M-class architectures.
4380 } else if (Subtarget->preferISHSTBarriers() &&
4381 Ord == AtomicOrdering::Release) {
4382 // Swift happens to implement ISHST barriers in a way that's compatible with
4383 // Release semantics but weaker than ISH so we'd be fools not to use
4384 // it. Beware: other processors probably don't!
4386 }
4387
4388 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4389 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4390 DAG.getConstant(Domain, dl, MVT::i32));
4391}
4392
4394 const ARMSubtarget *Subtarget) {
4395 // ARM pre v5TE and Thumb1 does not have preload instructions.
4396 if (!(Subtarget->isThumb2() ||
4397 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4398 // Just preserve the chain.
4399 return Op.getOperand(0);
4400
4401 SDLoc dl(Op);
4402 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4403 if (!isRead &&
4404 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4405 // ARMv7 with MP extension has PLDW.
4406 return Op.getOperand(0);
4407
4408 unsigned isData = Op.getConstantOperandVal(4);
4409 if (Subtarget->isThumb()) {
4410 // Invert the bits.
4411 isRead = ~isRead & 1;
4412 isData = ~isData & 1;
4413 }
4414
4415 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4416 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4417 DAG.getConstant(isData, dl, MVT::i32));
4418}
4419
4422 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4423
4424 // vastart just stores the address of the VarArgsFrameIndex slot into the
4425 // memory location argument.
4426 SDLoc dl(Op);
4428 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4429 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4430 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4431 MachinePointerInfo(SV));
4432}
4433
4434SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4435 CCValAssign &NextVA,
4436 SDValue &Root,
4437 SelectionDAG &DAG,
4438 const SDLoc &dl) const {
4441
4442 const TargetRegisterClass *RC;
4443 if (AFI->isThumb1OnlyFunction())
4444 RC = &ARM::tGPRRegClass;
4445 else
4446 RC = &ARM::GPRRegClass;
4447
4448 // Transform the arguments stored in physical registers into virtual ones.
4449 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4450 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4451
4452 SDValue ArgValue2;
4453 if (NextVA.isMemLoc()) {
4454 MachineFrameInfo &MFI = MF.getFrameInfo();
4455 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4456
4457 // Create load node to retrieve arguments from the stack.
4458 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4459 ArgValue2 = DAG.getLoad(
4460 MVT::i32, dl, Root, FIN,
4462 } else {
4463 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4464 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4465 }
4466 if (!Subtarget->isLittle())
4467 std::swap (ArgValue, ArgValue2);
4468 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4469}
4470
4471// The remaining GPRs hold either the beginning of variable-argument
4472// data, or the beginning of an aggregate passed by value (usually
4473// byval). Either way, we allocate stack slots adjacent to the data
4474// provided by our caller, and store the unallocated registers there.
4475// If this is a variadic function, the va_list pointer will begin with
4476// these values; otherwise, this reassembles a (byval) structure that
4477// was split between registers and memory.
4478// Return: The frame index registers were stored into.
4479int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4480 const SDLoc &dl, SDValue &Chain,
4481 const Value *OrigArg,
4482 unsigned InRegsParamRecordIdx,
4483 int ArgOffset, unsigned ArgSize) const {
4484 // Currently, two use-cases possible:
4485 // Case #1. Non-var-args function, and we meet first byval parameter.
4486 // Setup first unallocated register as first byval register;
4487 // eat all remained registers
4488 // (these two actions are performed by HandleByVal method).
4489 // Then, here, we initialize stack frame with
4490 // "store-reg" instructions.
4491 // Case #2. Var-args function, that doesn't contain byval parameters.
4492 // The same: eat all remained unallocated registers,
4493 // initialize stack frame.
4494
4496 MachineFrameInfo &MFI = MF.getFrameInfo();
4498 unsigned RBegin, REnd;
4499 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4500 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4501 } else {
4502 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4503 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4504 REnd = ARM::R4;
4505 }
4506
4507 if (REnd != RBegin)
4508 ArgOffset = -4 * (ARM::R4 - RBegin);
4509
4510 auto PtrVT = getPointerTy(DAG.getDataLayout());
4511 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4512 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4513
4515 const TargetRegisterClass *RC =
4516 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4517
4518 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4519 Register VReg = MF.addLiveIn(Reg, RC);
4520 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4521 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4522 MachinePointerInfo(OrigArg, 4 * i));
4523 MemOps.push_back(Store);
4524 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4525 }
4526
4527 if (!MemOps.empty())
4528 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4529 return FrameIndex;
4530}
4531
4532// Setup stack frame, the va_list pointer will start from.
4533void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4534 const SDLoc &dl, SDValue &Chain,
4535 unsigned ArgOffset,
4536 unsigned TotalArgRegsSaveSize,
4537 bool ForceMutable) const {
4540
4541 // Try to store any remaining integer argument regs
4542 // to their spots on the stack so that they may be loaded by dereferencing
4543 // the result of va_next.
4544 // If there is no regs to be stored, just point address after last
4545 // argument passed via stack.
4546 int FrameIndex = StoreByValRegs(
4547 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4548 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4549 AFI->setVarArgsFrameIndex(FrameIndex);
4550}
4551
4552bool ARMTargetLowering::splitValueIntoRegisterParts(
4553 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4554 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4555 EVT ValueVT = Val.getValueType();
4556 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4557 unsigned ValueBits = ValueVT.getSizeInBits();
4558 unsigned PartBits = PartVT.getSizeInBits();
4559 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4560 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4561 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4562 Parts[0] = Val;
4563 return true;
4564 }
4565 return false;
4566}
4567
4568SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4569 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4570 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4571 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4572 unsigned ValueBits = ValueVT.getSizeInBits();
4573 unsigned PartBits = PartVT.getSizeInBits();
4574 SDValue Val = Parts[0];
4575
4576 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4577 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4578 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4579 return Val;
4580 }
4581 return SDValue();
4582}
4583
4584SDValue ARMTargetLowering::LowerFormalArguments(
4585 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4586 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4587 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4589 MachineFrameInfo &MFI = MF.getFrameInfo();
4590
4592
4593 // Assign locations to all of the incoming arguments.
4595 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4596 *DAG.getContext());
4597 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4598
4600 unsigned CurArgIdx = 0;
4601
4602 // Initially ArgRegsSaveSize is zero.
4603 // Then we increase this value each time we meet byval parameter.
4604 // We also increase this value in case of varargs function.
4605 AFI->setArgRegsSaveSize(0);
4606
4607 // Calculate the amount of stack space that we need to allocate to store
4608 // byval and variadic arguments that are passed in registers.
4609 // We need to know this before we allocate the first byval or variadic
4610 // argument, as they will be allocated a stack slot below the CFA (Canonical
4611 // Frame Address, the stack pointer at entry to the function).
4612 unsigned ArgRegBegin = ARM::R4;
4613 for (const CCValAssign &VA : ArgLocs) {
4614 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4615 break;
4616
4617 unsigned Index = VA.getValNo();
4618 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4619 if (!Flags.isByVal())
4620 continue;
4621
4622 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4623 unsigned RBegin, REnd;
4624 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4625 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4626
4627 CCInfo.nextInRegsParam();
4628 }
4629 CCInfo.rewindByValRegsInfo();
4630
4631 int lastInsIndex = -1;
4632 if (isVarArg && MFI.hasVAStart()) {
4633 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4634 if (RegIdx != std::size(GPRArgRegs))
4635 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4636 }
4637
4638 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4639 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4640 auto PtrVT = getPointerTy(DAG.getDataLayout());
4641
4642 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4643 CCValAssign &VA = ArgLocs[i];
4644 if (Ins[VA.getValNo()].isOrigArg()) {
4645 std::advance(CurOrigArg,
4646 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4647 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4648 }
4649 // Arguments stored in registers.
4650 if (VA.isRegLoc()) {
4651 EVT RegVT = VA.getLocVT();
4652 SDValue ArgValue;
4653
4654 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4655 // f64 and vector types are split up into multiple registers or
4656 // combinations of registers and stack slots.
4657 SDValue ArgValue1 =
4658 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4659 VA = ArgLocs[++i]; // skip ahead to next loc
4660 SDValue ArgValue2;
4661 if (VA.isMemLoc()) {
4662 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4663 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4664 ArgValue2 = DAG.getLoad(
4665 MVT::f64, dl, Chain, FIN,
4667 } else {
4668 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4669 }
4670 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4671 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4672 ArgValue1, DAG.getIntPtrConstant(0, dl));
4673 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4674 ArgValue2, DAG.getIntPtrConstant(1, dl));
4675 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4676 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4677 } else {
4678 const TargetRegisterClass *RC;
4679
4680 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4681 RC = &ARM::HPRRegClass;
4682 else if (RegVT == MVT::f32)
4683 RC = &ARM::SPRRegClass;
4684 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4685 RegVT == MVT::v4bf16)
4686 RC = &ARM::DPRRegClass;
4687 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4688 RegVT == MVT::v8bf16)
4689 RC = &ARM::QPRRegClass;
4690 else if (RegVT == MVT::i32)
4691 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4692 : &ARM::GPRRegClass;
4693 else
4694 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4695
4696 // Transform the arguments in physical registers into virtual ones.
4697 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4698 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4699
4700 // If this value is passed in r0 and has the returned attribute (e.g.
4701 // C++ 'structors), record this fact for later use.
4702 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4703 AFI->setPreservesR0();
4704 }
4705 }
4706
4707 // If this is an 8 or 16-bit value, it is really passed promoted
4708 // to 32 bits. Insert an assert[sz]ext to capture this, then
4709 // truncate to the right size.
4710 switch (VA.getLocInfo()) {
4711 default: llvm_unreachable("Unknown loc info!");
4712 case CCValAssign::Full: break;
4713 case CCValAssign::BCvt:
4714 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4715 break;
4716 }
4717
4718 // f16 arguments have their size extended to 4 bytes and passed as if they
4719 // had been copied to the LSBs of a 32-bit register.
4720 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4721 if (VA.needsCustom() &&
4722 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4723 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4724
4725 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4726 // less than 32 bits must be sign- or zero-extended in the callee for
4727 // security reasons. Although the ABI mandates an extension done by the
4728 // caller, the latter cannot be trusted to follow the rules of the ABI.
4729 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4730 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4731 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4732 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4733
4734 InVals.push_back(ArgValue);
4735 } else { // VA.isRegLoc()
4736 // Only arguments passed on the stack should make it here.
4737 assert(VA.isMemLoc());
4738 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4739
4740 int index = VA.getValNo();
4741
4742 // Some Ins[] entries become multiple ArgLoc[] entries.
4743 // Process them only once.
4744 if (index != lastInsIndex)
4745 {
4746 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4747 // FIXME: For now, all byval parameter objects are marked mutable.
4748 // This can be changed with more analysis.
4749 // In case of tail call optimization mark all arguments mutable.
4750 // Since they could be overwritten by lowering of arguments in case of
4751 // a tail call.
4752 if (Flags.isByVal()) {
4753 assert(Ins[index].isOrigArg() &&
4754 "Byval arguments cannot be implicit");
4755 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4756
4757 int FrameIndex = StoreByValRegs(
4758 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4759 VA.getLocMemOffset(), Flags.getByValSize());
4760 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4761 CCInfo.nextInRegsParam();
4762 } else {
4763 unsigned FIOffset = VA.getLocMemOffset();
4764 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4765 FIOffset, true);
4766
4767 // Create load nodes to retrieve arguments from the stack.
4768 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4769 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4771 DAG.getMachineFunction(), FI)));
4772 }
4773 lastInsIndex = index;
4774 }
4775 }
4776 }
4777
4778 // varargs
4779 if (isVarArg && MFI.hasVAStart()) {
4780 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4781 TotalArgRegsSaveSize);
4782 if (AFI->isCmseNSEntryFunction()) {
4785 "secure entry function must not be variadic", dl.getDebugLoc());
4786 DAG.getContext()->diagnose(Diag);
4787 }
4788 }
4789
4790 unsigned StackArgSize = CCInfo.getStackSize();
4791 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4792 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4793 // The only way to guarantee a tail call is if the callee restores its
4794 // argument area, but it must also keep the stack aligned when doing so.
4795 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
4796 assert(StackAlign && "data layout string is missing stack alignment");
4797 StackArgSize = alignTo(StackArgSize, *StackAlign);
4798
4799 AFI->setArgumentStackToRestore(StackArgSize);
4800 }
4801 AFI->setArgumentStackSize(StackArgSize);
4802
4803 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4806 "secure entry function requires arguments on stack", dl.getDebugLoc());
4807 DAG.getContext()->diagnose(Diag);
4808 }
4809
4810 return Chain;
4811}
4812
4813/// isFloatingPointZero - Return true if this is +0.0.
4815 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4816 return CFP->getValueAPF().isPosZero();
4817 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4818 // Maybe this has already been legalized into the constant pool?
4819 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4820 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4821 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4822 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4823 return CFP->getValueAPF().isPosZero();
4824 }
4825 } else if (Op->getOpcode() == ISD::BITCAST &&
4826 Op->getValueType(0) == MVT::f64) {
4827 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4828 // created by LowerConstantFP().
4829 SDValue BitcastOp = Op->getOperand(0);
4830 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4831 isNullConstant(BitcastOp->getOperand(0)))
4832 return true;
4833 }
4834 return false;
4835}
4836
4837/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4838/// the given operands.
4839SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4840 SDValue &ARMcc, SelectionDAG &DAG,
4841 const SDLoc &dl) const {
4842 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4843 unsigned C = RHSC->getZExtValue();
4844 if (!isLegalICmpImmediate((int32_t)C)) {
4845 // Constant does not fit, try adjusting it by one.
4846 switch (CC) {
4847 default: break;
4848 case ISD::SETLT:
4849 case ISD::SETGE:
4850 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4852 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4853 }
4854 break;
4855 case ISD::SETULT:
4856 case ISD::SETUGE:
4857 if (C != 0 && isLegalICmpImmediate(C-1)) {
4859 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4860 }
4861 break;
4862 case ISD::SETLE:
4863 case ISD::SETGT:
4864 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4866 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4867 }
4868 break;
4869 case ISD::SETULE:
4870 case ISD::SETUGT:
4871 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4873 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4874 }
4875 break;
4876 }
4877 }
4878 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4880 // In ARM and Thumb-2, the compare instructions can shift their second
4881 // operand.
4883 std::swap(LHS, RHS);
4884 }
4885
4886 // Thumb1 has very limited immediate modes, so turning an "and" into a
4887 // shift can save multiple instructions.
4888 //
4889 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4890 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4891 // own. If it's the operand to an unsigned comparison with an immediate,
4892 // we can eliminate one of the shifts: we transform
4893 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4894 //
4895 // We avoid transforming cases which aren't profitable due to encoding
4896 // details:
4897 //
4898 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4899 // would not; in that case, we're essentially trading one immediate load for
4900 // another.
4901 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4902 // 3. C2 is zero; we have other code for this special case.
4903 //
4904 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4905 // instruction, since the AND is always one instruction anyway, but we could
4906 // use narrow instructions in some cases.
4907 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4908 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4909 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4910 !isSignedIntSetCC(CC)) {
4911 unsigned Mask = LHS.getConstantOperandVal(1);
4912 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4913 uint64_t RHSV = RHSC->getZExtValue();
4914 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4915 unsigned ShiftBits = llvm::countl_zero(Mask);
4916 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4917 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4918 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4919 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4920 }
4921 }
4922 }
4923
4924 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4925 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4926 // way a cmp would.
4927 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4928 // some tweaks to the heuristics for the previous and->shift transform.
4929 // FIXME: Optimize cases where the LHS isn't a shift.
4930 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4931 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4932 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4933 LHS.getConstantOperandVal(1) < 31) {
4934 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4935 SDValue Shift =
4936 DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
4937 LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
4938 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4939 return Shift.getValue(1);
4940 }
4941
4943
4944 // If the RHS is a constant zero then the V (overflow) flag will never be
4945 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4946 // simpler for other passes (like the peephole optimiser) to deal with.
4947 if (isNullConstant(RHS)) {
4948 switch (CondCode) {
4949 default: break;
4950 case ARMCC::GE:
4952 break;
4953 case ARMCC::LT:
4955 break;
4956 }
4957 }
4958
4959 ARMISD::NodeType CompareType;
4960 switch (CondCode) {
4961 default:
4962 CompareType = ARMISD::CMP;
4963 break;
4964 case ARMCC::EQ:
4965 case ARMCC::NE:
4966 // Uses only Z Flag
4967 CompareType = ARMISD::CMPZ;
4968 break;
4969 }
4970 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4971 return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
4972}
4973
4974/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4975SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4976 SelectionDAG &DAG, const SDLoc &dl,
4977 bool Signaling) const {
4978 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4979 SDValue Flags;
4980 if (!isFloatingPointZero(RHS))
4981 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
4982 LHS, RHS);
4983 else
4984 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4985 FlagsVT, LHS);
4986 return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
4987}
4988
4989// This function returns three things: the arithmetic computation itself
4990// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4991// comparison and the condition code define the case in which the arithmetic
4992// computation *does not* overflow.
4993std::pair<SDValue, SDValue>
4994ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4995 SDValue &ARMcc) const {
4996 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4997
4998 SDValue Value, OverflowCmp;
4999 SDValue LHS = Op.getOperand(0);
5000 SDValue RHS = Op.getOperand(1);
5001 SDLoc dl(Op);
5002
5003 // FIXME: We are currently always generating CMPs because we don't support
5004 // generating CMN through the backend. This is not as good as the natural
5005 // CMP case because it causes a register dependency and cannot be folded
5006 // later.
5007
5008 switch (Op.getOpcode()) {
5009 default:
5010 llvm_unreachable("Unknown overflow instruction!");
5011 case ISD::SADDO:
5012 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
5013 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
5014 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
5015 break;
5016 case ISD::UADDO:
5017 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
5018 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
5019 // We do not use it in the USUBO case as Value may not be used.
5020 Value = DAG.getNode(ARMISD::ADDC, dl,
5021 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
5022 .getValue(0);
5023 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
5024 break;
5025 case ISD::SSUBO:
5026 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
5027 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
5028 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
5029 break;
5030 case ISD::USUBO:
5031 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
5032 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
5033 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
5034 break;
5035 case ISD::UMULO:
5036 // We generate a UMUL_LOHI and then check if the high word is 0.
5037 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
5038 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
5039 DAG.getVTList(Op.getValueType(), Op.getValueType()),
5040 LHS, RHS);
5041 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
5042 DAG.getConstant(0, dl, MVT::i32));
5043 Value = Value.getValue(0); // We only want the low 32 bits for the result.
5044 break;
5045 case ISD::SMULO:
5046 // We generate a SMUL_LOHI and then check if all the bits of the high word
5047 // are the same as the sign bit of the low word.
5048 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
5049 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
5050 DAG.getVTList(Op.getValueType(), Op.getValueType()),
5051 LHS, RHS);
5052 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
5053 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
5054 Value.getValue(0),
5055 DAG.getConstant(31, dl, MVT::i32)));
5056 Value = Value.getValue(0); // We only want the low 32 bits for the result.
5057 break;
5058 } // switch (...)
5059
5060 return std::make_pair(Value, OverflowCmp);
5061}
5062
5063SDValue
5064ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
5065 // Let legalize expand this if it isn't a legal type yet.
5066 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5067 return SDValue();
5068
5069 SDValue Value, OverflowCmp;
5070 SDValue ARMcc;
5071 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
5072 SDLoc dl(Op);
5073 // We use 0 and 1 as false and true values.
5074 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
5075 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
5076 EVT VT = Op.getValueType();
5077
5078 SDValue Overflow =
5079 DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
5080
5081 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
5082 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5083}
5084
5086 SelectionDAG &DAG) {
5087 SDLoc DL(BoolCarry);
5088 EVT CarryVT = BoolCarry.getValueType();
5089
5090 // This converts the boolean value carry into the carry flag by doing
5091 // ARMISD::SUBC Carry, 1
5092 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5093 DAG.getVTList(CarryVT, MVT::i32),
5094 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5095 return Carry.getValue(1);
5096}
5097
5099 SelectionDAG &DAG) {
5100 SDLoc DL(Flags);
5101
5102 // Now convert the carry flag into a boolean carry. We do this
5103 // using ARMISD:ADDE 0, 0, Carry
5104 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5105 DAG.getConstant(0, DL, MVT::i32),
5106 DAG.getConstant(0, DL, MVT::i32), Flags);
5107}
5108
5109SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5110 SelectionDAG &DAG) const {
5111 // Let legalize expand this if it isn't a legal type yet.
5112 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5113 return SDValue();
5114
5115 SDValue LHS = Op.getOperand(0);
5116 SDValue RHS = Op.getOperand(1);
5117 SDLoc dl(Op);
5118
5119 EVT VT = Op.getValueType();
5120 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5121 SDValue Value;
5122 SDValue Overflow;
5123 switch (Op.getOpcode()) {
5124 default:
5125 llvm_unreachable("Unknown overflow instruction!");
5126 case ISD::UADDO:
5127 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5128 // Convert the carry flag into a boolean value.
5129 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5130 break;
5131 case ISD::USUBO: {
5132 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5133 // Convert the carry flag into a boolean value.
5134 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5135 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5136 // value. So compute 1 - C.
5137 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5138 DAG.getConstant(1, dl, MVT::i32), Overflow);
5139 break;
5140 }
5141 }
5142
5143 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5144}
5145
5147 const ARMSubtarget *Subtarget) {
5148 EVT VT = Op.getValueType();
5149 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5150 return SDValue();
5151 if (!VT.isSimple())
5152 return SDValue();
5153
5154 unsigned NewOpcode;
5155 switch (VT.getSimpleVT().SimpleTy) {
5156 default:
5157 return SDValue();
5158 case MVT::i8:
5159 switch (Op->getOpcode()) {
5160 case ISD::UADDSAT:
5161 NewOpcode = ARMISD::UQADD8b;
5162 break;
5163 case ISD::SADDSAT:
5164 NewOpcode = ARMISD::QADD8b;
5165 break;
5166 case ISD::USUBSAT:
5167 NewOpcode = ARMISD::UQSUB8b;
5168 break;
5169 case ISD::SSUBSAT:
5170 NewOpcode = ARMISD::QSUB8b;
5171 break;
5172 }
5173 break;
5174 case MVT::i16:
5175 switch (Op->getOpcode()) {
5176 case ISD::UADDSAT:
5177 NewOpcode = ARMISD::UQADD16b;
5178 break;
5179 case ISD::SADDSAT:
5180 NewOpcode = ARMISD::QADD16b;
5181 break;
5182 case ISD::USUBSAT:
5183 NewOpcode = ARMISD::UQSUB16b;
5184 break;
5185 case ISD::SSUBSAT:
5186 NewOpcode = ARMISD::QSUB16b;
5187 break;
5188 }
5189 break;
5190 }
5191
5192 SDLoc dl(Op);
5193 SDValue Add =
5194 DAG.getNode(NewOpcode, dl, MVT::i32,
5195 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5196 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5197 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5198}
5199
5200SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5201 SDValue Cond = Op.getOperand(0);
5202 SDValue SelectTrue = Op.getOperand(1);
5203 SDValue SelectFalse = Op.getOperand(2);
5204 SDLoc dl(Op);
5205 unsigned Opc = Cond.getOpcode();
5206
5207 if (Cond.getResNo() == 1 &&
5208 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5209 Opc == ISD::USUBO)) {
5210 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5211 return SDValue();
5212
5213 SDValue Value, OverflowCmp;
5214 SDValue ARMcc;
5215 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5216 EVT VT = Op.getValueType();
5217
5218 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
5219 }
5220
5221 // Convert:
5222 //
5223 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5224 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5225 //
5226 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5227 const ConstantSDNode *CMOVTrue =
5228 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5229 const ConstantSDNode *CMOVFalse =
5230 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5231
5232 if (CMOVTrue && CMOVFalse) {
5233 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5234 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5235
5236 SDValue True;
5237 SDValue False;
5238 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5239 True = SelectTrue;
5240 False = SelectFalse;
5241 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5242 True = SelectFalse;
5243 False = SelectTrue;
5244 }
5245
5246 if (True.getNode() && False.getNode())
5247 return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
5248 Cond.getOperand(3), DAG);
5249 }
5250 }
5251
5252 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5253 // undefined bits before doing a full-word comparison with zero.
5254 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5255 DAG.getConstant(1, dl, Cond.getValueType()));
5256
5257 return DAG.getSelectCC(dl, Cond,
5258 DAG.getConstant(0, dl, Cond.getValueType()),
5259 SelectTrue, SelectFalse, ISD::SETNE);
5260}
5261
5263 bool &swpCmpOps, bool &swpVselOps) {
5264 // Start by selecting the GE condition code for opcodes that return true for
5265 // 'equality'
5266 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5267 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5268 CondCode = ARMCC::GE;
5269
5270 // and GT for opcodes that return false for 'equality'.
5271 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5272 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5273 CondCode = ARMCC::GT;
5274
5275 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5276 // to swap the compare operands.
5277 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5278 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5279 swpCmpOps = true;
5280
5281 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5282 // If we have an unordered opcode, we need to swap the operands to the VSEL
5283 // instruction (effectively negating the condition).
5284 //
5285 // This also has the effect of swapping which one of 'less' or 'greater'
5286 // returns true, so we also swap the compare operands. It also switches
5287 // whether we return true for 'equality', so we compensate by picking the
5288 // opposite condition code to our original choice.
5289 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5290 CC == ISD::SETUGT) {
5291 swpCmpOps = !swpCmpOps;
5292 swpVselOps = !swpVselOps;
5293 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5294 }
5295
5296 // 'ordered' is 'anything but unordered', so use the VS condition code and
5297 // swap the VSEL operands.
5298 if (CC == ISD::SETO) {
5299 CondCode = ARMCC::VS;
5300 swpVselOps = true;
5301 }
5302
5303 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5304 // code and swap the VSEL operands. Also do this if we don't care about the
5305 // unordered case.
5306 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5307 CondCode = ARMCC::EQ;
5308 swpVselOps = true;
5309 }
5310}
5311
5312SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5313 SDValue TrueVal, SDValue ARMcc,
5314 SDValue Flags, SelectionDAG &DAG) const {
5315 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5317 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5319 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5320
5321 SDValue TrueLow = TrueVal.getValue(0);
5322 SDValue TrueHigh = TrueVal.getValue(1);
5323 SDValue FalseLow = FalseVal.getValue(0);
5324 SDValue FalseHigh = FalseVal.getValue(1);
5325
5326 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5327 ARMcc, Flags);
5328 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5329 ARMcc, Flags);
5330
5331 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5332 }
5333 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
5334}
5335
5337 return CC == ISD::SETGT || CC == ISD::SETGE;
5338}
5339
5341 return CC == ISD::SETLT || CC == ISD::SETLE;
5342}
5343
5344// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5345// All of these conditions (and their <= and >= counterparts) will do:
5346// x < k ? k : x
5347// x > k ? x : k
5348// k < x ? x : k
5349// k > x ? k : x
5350static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5351 const SDValue TrueVal, const SDValue FalseVal,
5352 const ISD::CondCode CC, const SDValue K) {
5353 return (isGTorGE(CC) &&
5354 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5355 (isLTorLE(CC) &&
5356 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5357}
5358
5359// Check if two chained conditionals could be converted into SSAT or USAT.
5360//
5361// SSAT can replace a set of two conditional selectors that bound a number to an
5362// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5363//
5364// x < -k ? -k : (x > k ? k : x)
5365// x < -k ? -k : (x < k ? x : k)
5366// x > -k ? (x > k ? k : x) : -k
5367// x < k ? (x < -k ? -k : x) : k
5368// etc.
5369//
5370// LLVM canonicalizes these to either a min(max()) or a max(min())
5371// pattern. This function tries to match one of these and will return a SSAT
5372// node if successful.
5373//
5374// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5375// is a power of 2.
5377 EVT VT = Op.getValueType();
5378 SDValue V1 = Op.getOperand(0);
5379 SDValue K1 = Op.getOperand(1);
5380 SDValue TrueVal1 = Op.getOperand(2);
5381 SDValue FalseVal1 = Op.getOperand(3);
5382 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5383
5384 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5385 if (Op2.getOpcode() != ISD::SELECT_CC)
5386 return SDValue();
5387
5388 SDValue V2 = Op2.getOperand(0);
5389 SDValue K2 = Op2.getOperand(1);
5390 SDValue TrueVal2 = Op2.getOperand(2);
5391 SDValue FalseVal2 = Op2.getOperand(3);
5392 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5393
5394 SDValue V1Tmp = V1;
5395 SDValue V2Tmp = V2;
5396
5397 // Check that the registers and the constants match a max(min()) or min(max())
5398 // pattern
5399 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5400 K2 != FalseVal2 ||
5401 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5402 return SDValue();
5403
5404 // Check that the constant in the lower-bound check is
5405 // the opposite of the constant in the upper-bound check
5406 // in 1's complement.
5407 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5408 return SDValue();
5409
5410 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5411 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5412 int64_t PosVal = std::max(Val1, Val2);
5413 int64_t NegVal = std::min(Val1, Val2);
5414
5415 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5416 !isPowerOf2_64(PosVal + 1))
5417 return SDValue();
5418
5419 // Handle the difference between USAT (unsigned) and SSAT (signed)
5420 // saturation
5421 // At this point, PosVal is guaranteed to be positive
5422 uint64_t K = PosVal;
5423 SDLoc dl(Op);
5424 if (Val1 == ~Val2)
5425 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5426 DAG.getConstant(llvm::countr_one(K), dl, VT));
5427 if (NegVal == 0)
5428 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5429 DAG.getConstant(llvm::countr_one(K), dl, VT));
5430
5431 return SDValue();
5432}
5433
5434// Check if a condition of the type x < k ? k : x can be converted into a
5435// bit operation instead of conditional moves.
5436// Currently this is allowed given:
5437// - The conditions and values match up
5438// - k is 0 or -1 (all ones)
5439// This function will not check the last condition, thats up to the caller
5440// It returns true if the transformation can be made, and in such case
5441// returns x in V, and k in SatK.
5443 SDValue &SatK)
5444{
5445 SDValue LHS = Op.getOperand(0);
5446 SDValue RHS = Op.getOperand(1);
5447 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5448 SDValue TrueVal = Op.getOperand(2);
5449 SDValue FalseVal = Op.getOperand(3);
5450
5451 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5452 ? &RHS
5453 : nullptr;
5454
5455 // No constant operation in comparison, early out
5456 if (!K)
5457 return false;
5458
5459 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5460 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5461 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5462
5463 // If the constant on left and right side, or variable on left and right,
5464 // does not match, early out
5465 if (*K != KTmp || V != VTmp)
5466 return false;
5467
5468 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5469 SatK = *K;
5470 return true;
5471 }
5472
5473 return false;
5474}
5475
5476bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5477 if (VT == MVT::f32)
5478 return !Subtarget->hasVFP2Base();
5479 if (VT == MVT::f64)
5480 return !Subtarget->hasFP64();
5481 if (VT == MVT::f16)
5482 return !Subtarget->hasFullFP16();
5483 return false;
5484}
5485
5486SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5487 EVT VT = Op.getValueType();
5488 SDLoc dl(Op);
5489
5490 // Try to convert two saturating conditional selects into a single SSAT
5491 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5492 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5493 return SatValue;
5494
5495 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5496 // into more efficient bit operations, which is possible when k is 0 or -1
5497 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5498 // single instructions. On Thumb the shift and the bit operation will be two
5499 // instructions.
5500 // Only allow this transformation on full-width (32-bit) operations
5501 SDValue LowerSatConstant;
5502 SDValue SatValue;
5503 if (VT == MVT::i32 &&
5504 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5505 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5506 DAG.getConstant(31, dl, VT));
5507 if (isNullConstant(LowerSatConstant)) {
5508 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5509 DAG.getAllOnesConstant(dl, VT));
5510 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5511 } else if (isAllOnesConstant(LowerSatConstant))
5512 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5513 }
5514
5515 SDValue LHS = Op.getOperand(0);
5516 SDValue RHS = Op.getOperand(1);
5517 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5518 SDValue TrueVal = Op.getOperand(2);
5519 SDValue FalseVal = Op.getOperand(3);
5520 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5521 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5522
5523 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5524 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5525 unsigned TVal = CTVal->getZExtValue();
5526 unsigned FVal = CFVal->getZExtValue();
5527 unsigned Opcode = 0;
5528
5529 if (TVal == ~FVal) {
5530 Opcode = ARMISD::CSINV;
5531 } else if (TVal == ~FVal + 1) {
5532 Opcode = ARMISD::CSNEG;
5533 } else if (TVal + 1 == FVal) {
5534 Opcode = ARMISD::CSINC;
5535 } else if (TVal == FVal + 1) {
5536 Opcode = ARMISD::CSINC;
5537 std::swap(TrueVal, FalseVal);
5538 std::swap(TVal, FVal);
5539 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5540 }
5541
5542 if (Opcode) {
5543 // If one of the constants is cheaper than another, materialise the
5544 // cheaper one and let the csel generate the other.
5545 if (Opcode != ARMISD::CSINC &&
5546 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5547 std::swap(TrueVal, FalseVal);
5548 std::swap(TVal, FVal);
5549 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5550 }
5551
5552 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5553 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5554 // -(-a) == a, but (a+1)+1 != a).
5555 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5556 std::swap(TrueVal, FalseVal);
5557 std::swap(TVal, FVal);
5558 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5559 }
5560
5561 // Drops F's value because we can get it by inverting/negating TVal.
5562 FalseVal = TrueVal;
5563
5564 SDValue ARMcc;
5565 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5566 EVT VT = TrueVal.getValueType();
5567 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5568 }
5569 }
5570
5571 if (isUnsupportedFloatingType(LHS.getValueType())) {
5573 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5574
5575 // If softenSetCCOperands only returned one value, we should compare it to
5576 // zero.
5577 if (!RHS.getNode()) {
5578 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5579 CC = ISD::SETNE;
5580 }
5581 }
5582
5583 if (LHS.getValueType() == MVT::i32) {
5584 // Try to generate VSEL on ARMv8.
5585 // The VSEL instruction can't use all the usual ARM condition
5586 // codes: it only has two bits to select the condition code, so it's
5587 // constrained to use only GE, GT, VS and EQ.
5588 //
5589 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5590 // swap the operands of the previous compare instruction (effectively
5591 // inverting the compare condition, swapping 'less' and 'greater') and
5592 // sometimes need to swap the operands to the VSEL (which inverts the
5593 // condition in the sense of firing whenever the previous condition didn't)
5594 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5595 TrueVal.getValueType() == MVT::f32 ||
5596 TrueVal.getValueType() == MVT::f64)) {
5598 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5599 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5600 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5601 std::swap(TrueVal, FalseVal);
5602 }
5603 }
5604
5605 SDValue ARMcc;
5606 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5607 // Choose GE over PL, which vsel does now support
5608 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5609 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5610 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5611 }
5612
5613 ARMCC::CondCodes CondCode, CondCode2;
5614 FPCCToARMCC(CC, CondCode, CondCode2);
5615
5616 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5617 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5618 // must use VSEL (limited condition codes), due to not having conditional f16
5619 // moves.
5620 if (Subtarget->hasFPARMv8Base() &&
5621 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5622 (TrueVal.getValueType() == MVT::f16 ||
5623 TrueVal.getValueType() == MVT::f32 ||
5624 TrueVal.getValueType() == MVT::f64)) {
5625 bool swpCmpOps = false;
5626 bool swpVselOps = false;
5627 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5628
5629 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5630 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5631 if (swpCmpOps)
5632 std::swap(LHS, RHS);
5633 if (swpVselOps)
5634 std::swap(TrueVal, FalseVal);
5635 }
5636 }
5637
5638 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5639 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5640 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5641 if (CondCode2 != ARMCC::AL) {
5642 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5643 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5644 }
5645 return Result;
5646}
5647
5648/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5649/// to morph to an integer compare sequence.
5650static bool canChangeToInt(SDValue Op, bool &SeenZero,
5651 const ARMSubtarget *Subtarget) {
5652 SDNode *N = Op.getNode();
5653 if (!N->hasOneUse())
5654 // Otherwise it requires moving the value from fp to integer registers.
5655 return false;
5656 if (!N->getNumValues())
5657 return false;
5658 EVT VT = Op.getValueType();
5659 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5660 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5661 // vmrs are very slow, e.g. cortex-a8.
5662 return false;
5663
5664 if (isFloatingPointZero(Op)) {
5665 SeenZero = true;
5666 return true;
5667 }
5668 return ISD::isNormalLoad(N);
5669}
5670
5673 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5674
5675 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5676 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5677 Ld->getPointerInfo(), Ld->getAlign(),
5678 Ld->getMemOperand()->getFlags());
5679
5680 llvm_unreachable("Unknown VFP cmp argument!");
5681}
5682
5684 SDValue &RetVal1, SDValue &RetVal2) {
5685 SDLoc dl(Op);
5686
5687 if (isFloatingPointZero(Op)) {
5688 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5689 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5690 return;
5691 }
5692
5693 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5694 SDValue Ptr = Ld->getBasePtr();
5695 RetVal1 =
5696 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5697 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5698
5699 EVT PtrType = Ptr.getValueType();
5700 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5701 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5702 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5703 Ld->getPointerInfo().getWithOffset(4),
5704 commonAlignment(Ld->getAlign(), 4),
5705 Ld->getMemOperand()->getFlags());
5706 return;
5707 }
5708
5709 llvm_unreachable("Unknown VFP cmp argument!");
5710}
5711
5712/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5713/// f32 and even f64 comparisons to integer ones.
5714SDValue
5715ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5716 SDValue Chain = Op.getOperand(0);
5717 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5718 SDValue LHS = Op.getOperand(2);
5719 SDValue RHS = Op.getOperand(3);
5720 SDValue Dest = Op.getOperand(4);
5721 SDLoc dl(Op);
5722
5723 bool LHSSeenZero = false;
5724 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5725 bool RHSSeenZero = false;
5726 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5727 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5728 // If unsafe fp math optimization is enabled and there are no other uses of
5729 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5730 // to an integer comparison.
5731 if (CC == ISD::SETOEQ)
5732 CC = ISD::SETEQ;
5733 else if (CC == ISD::SETUNE)
5734 CC = ISD::SETNE;
5735
5736 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5737 SDValue ARMcc;
5738 if (LHS.getValueType() == MVT::f32) {
5739 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5740 bitcastf32Toi32(LHS, DAG), Mask);
5741 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5742 bitcastf32Toi32(RHS, DAG), Mask);
5743 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5744 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5745 Cmp);
5746 }
5747
5748 SDValue LHS1, LHS2;
5749 SDValue RHS1, RHS2;
5750 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5751 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5752 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5753 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5755 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5756 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5757 return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
5758 }
5759
5760 return SDValue();
5761}
5762
5763SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5764 SDValue Chain = Op.getOperand(0);
5765 SDValue Cond = Op.getOperand(1);
5766 SDValue Dest = Op.getOperand(2);
5767 SDLoc dl(Op);
5768
5769 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5770 // instruction.
5771 unsigned Opc = Cond.getOpcode();
5772 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5773 !Subtarget->isThumb1Only();
5774 if (Cond.getResNo() == 1 &&
5775 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5776 Opc == ISD::USUBO || OptimizeMul)) {
5777 // Only lower legal XALUO ops.
5778 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5779 return SDValue();
5780
5781 // The actual operation with overflow check.
5782 SDValue Value, OverflowCmp;
5783 SDValue ARMcc;
5784 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5785
5786 // Reverse the condition code.
5788 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5790 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5791
5792 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5793 OverflowCmp);
5794 }
5795
5796 return SDValue();
5797}
5798
5799SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5800 SDValue Chain = Op.getOperand(0);
5801 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5802 SDValue LHS = Op.getOperand(2);
5803 SDValue RHS = Op.getOperand(3);
5804 SDValue Dest = Op.getOperand(4);
5805 SDLoc dl(Op);
5806
5807 if (isUnsupportedFloatingType(LHS.getValueType())) {
5809 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5810
5811 // If softenSetCCOperands only returned one value, we should compare it to
5812 // zero.
5813 if (!RHS.getNode()) {
5814 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5815 CC = ISD::SETNE;
5816 }
5817 }
5818
5819 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5820 // instruction.
5821 unsigned Opc = LHS.getOpcode();
5822 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5823 !Subtarget->isThumb1Only();
5824 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5825 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5826 Opc == ISD::USUBO || OptimizeMul) &&
5827 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5828 // Only lower legal XALUO ops.
5829 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5830 return SDValue();
5831
5832 // The actual operation with overflow check.
5833 SDValue Value, OverflowCmp;
5834 SDValue ARMcc;
5835 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5836
5837 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5838 // Reverse the condition code.
5840 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5842 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5843 }
5844
5845 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5846 OverflowCmp);
5847 }
5848
5849 if (LHS.getValueType() == MVT::i32) {
5850 SDValue ARMcc;
5851 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5852 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5853 }
5854
5855 if (getTargetMachine().Options.UnsafeFPMath &&
5856 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5857 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5858 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5859 return Result;
5860 }
5861
5862 ARMCC::CondCodes CondCode, CondCode2;
5863 FPCCToARMCC(CC, CondCode, CondCode2);
5864
5865 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5866 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5867 SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
5868 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5869 if (CondCode2 != ARMCC::AL) {
5870 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5871 SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
5872 Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5873 }
5874 return Res;
5875}
5876
5877SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5878 SDValue Chain = Op.getOperand(0);
5879 SDValue Table = Op.getOperand(1);
5880 SDValue Index = Op.getOperand(2);
5881 SDLoc dl(Op);
5882
5883 EVT PTy = getPointerTy(DAG.getDataLayout());
5884 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5885 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5886 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5887 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5888 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5889 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5890 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5891 // which does another jump to the destination. This also makes it easier
5892 // to translate it to TBB / TBH later (Thumb2 only).
5893 // FIXME: This might not work if the function is extremely large.
5894 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5895 Addr, Op.getOperand(2), JTI);
5896 }
5897 if (isPositionIndependent() || Subtarget->isROPI()) {
5898 Addr =
5899 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5901 Chain = Addr.getValue(1);
5902 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5903 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5904 } else {
5905 Addr =
5906 DAG.getLoad(PTy, dl, Chain, Addr,
5908 Chain = Addr.getValue(1);
5909 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5910 }
5911}
5912
5914 EVT VT = Op.getValueType();
5915 SDLoc dl(Op);
5916
5917 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5918 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5919 return Op;
5920 return DAG.UnrollVectorOp(Op.getNode());
5921 }
5922
5923 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5924
5925 EVT NewTy;
5926 const EVT OpTy = Op.getOperand(0).getValueType();
5927 if (OpTy == MVT::v4f32)
5928 NewTy = MVT::v4i32;
5929 else if (OpTy == MVT::v4f16 && HasFullFP16)
5930 NewTy = MVT::v4i16;
5931 else if (OpTy == MVT::v8f16 && HasFullFP16)
5932 NewTy = MVT::v8i16;
5933 else
5934 llvm_unreachable("Invalid type for custom lowering!");
5935
5936 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5937 return DAG.UnrollVectorOp(Op.getNode());
5938
5939 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5940 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5941}
5942
5943SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5944 EVT VT = Op.getValueType();
5945 if (VT.isVector())
5946 return LowerVectorFP_TO_INT(Op, DAG);
5947
5948 bool IsStrict = Op->isStrictFPOpcode();
5949 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5950
5951 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5952 RTLIB::Libcall LC;
5953 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5954 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5955 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5956 Op.getValueType());
5957 else
5958 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5959 Op.getValueType());
5960 SDLoc Loc(Op);
5961 MakeLibCallOptions CallOptions;
5962 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5964 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5965 CallOptions, Loc, Chain);
5966 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5967 }
5968
5969 // FIXME: Remove this when we have strict fp instruction selection patterns
5970 if (IsStrict) {
5971 SDLoc Loc(Op);
5972 SDValue Result =
5975 Loc, Op.getValueType(), SrcVal);
5976 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5977 }
5978
5979 return Op;
5980}
5981
5983 const ARMSubtarget *Subtarget) {
5984 EVT VT = Op.getValueType();
5985 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5986 EVT FromVT = Op.getOperand(0).getValueType();
5987
5988 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5989 return Op;
5990 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5991 Subtarget->hasFP64())
5992 return Op;
5993 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5994 Subtarget->hasFullFP16())
5995 return Op;
5996 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5997 Subtarget->hasMVEFloatOps())
5998 return Op;
5999 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
6000 Subtarget->hasMVEFloatOps())
6001 return Op;
6002
6003 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
6004 return SDValue();
6005
6006 SDLoc DL(Op);
6007 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
6008 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
6009 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
6010 DAG.getValueType(VT.getScalarType()));
6011 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
6012 DAG.getConstant((1 << BW) - 1, DL, VT));
6013 if (IsSigned)
6014 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
6015 DAG.getSignedConstant(-(1 << BW), DL, VT));
6016 return Max;
6017}
6018
6020 EVT VT = Op.getValueType();
6021 SDLoc dl(Op);
6022
6023 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
6024 if (VT.getVectorElementType() == MVT::f32)
6025 return Op;
6026 return DAG.UnrollVectorOp(Op.getNode());
6027 }
6028
6029 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
6030 Op.getOperand(0).getValueType() == MVT::v8i16) &&
6031 "Invalid type for custom lowering!");
6032
6033 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
6034
6035 EVT DestVecType;
6036 if (VT == MVT::v4f32)
6037 DestVecType = MVT::v4i32;
6038 else if (VT == MVT::v4f16 && HasFullFP16)
6039 DestVecType = MVT::v4i16;
6040 else if (VT == MVT::v8f16 && HasFullFP16)
6041 DestVecType = MVT::v8i16;
6042 else
6043 return DAG.UnrollVectorOp(Op.getNode());
6044
6045 unsigned CastOpc;
6046 unsigned Opc;
6047 switch (Op.getOpcode()) {
6048 default: llvm_unreachable("Invalid opcode!");
6049 case ISD::SINT_TO_FP:
6050 CastOpc = ISD::SIGN_EXTEND;
6051 Opc = ISD::SINT_TO_FP;
6052 break;
6053 case ISD::UINT_TO_FP:
6054 CastOpc = ISD::ZERO_EXTEND;
6055 Opc = ISD::UINT_TO_FP;
6056 break;
6057 }
6058
6059 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
6060 return DAG.getNode(Opc, dl, VT, Op);
6061}
6062
6063SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
6064 EVT VT = Op.getValueType();
6065 if (VT.isVector())
6066 return LowerVectorINT_TO_FP(Op, DAG);
6067 if (isUnsupportedFloatingType(VT)) {
6068 RTLIB::Libcall LC;
6069 if (Op.getOpcode() == ISD::SINT_TO_FP)
6070 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6071 Op.getValueType());
6072 else
6073 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6074 Op.getValueType());
6075 MakeLibCallOptions CallOptions;
6076 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6077 CallOptions, SDLoc(Op)).first;
6078 }
6079
6080 return Op;
6081}
6082
6083SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6084 // Implement fcopysign with a fabs and a conditional fneg.
6085 SDValue Tmp0 = Op.getOperand(0);
6086 SDValue Tmp1 = Op.getOperand(1);
6087 SDLoc dl(Op);
6088 EVT VT = Op.getValueType();
6089 EVT SrcVT = Tmp1.getValueType();
6090 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6091 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6092 bool UseNEON = !InGPR && Subtarget->hasNEON();
6093
6094 if (UseNEON) {
6095 // Use VBSL to copy the sign bit.
6096 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6097 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6098 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6099 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6100 if (VT == MVT::f64)
6101 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6102 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6103 DAG.getConstant(32, dl, MVT::i32));
6104 else /*if (VT == MVT::f32)*/
6105 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6106 if (SrcVT == MVT::f32) {
6107 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6108 if (VT == MVT::f64)
6109 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6110 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6111 DAG.getConstant(32, dl, MVT::i32));
6112 } else if (VT == MVT::f32)
6113 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6114 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6115 DAG.getConstant(32, dl, MVT::i32));
6116 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6117 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6118
6120 dl, MVT::i32);
6121 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6122 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6123 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6124
6125 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6126 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6127 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6128 if (VT == MVT::f32) {
6129 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6130 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6131 DAG.getConstant(0, dl, MVT::i32));
6132 } else {
6133 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6134 }
6135
6136 return Res;
6137 }
6138
6139 // Bitcast operand 1 to i32.
6140 if (SrcVT == MVT::f64)
6141 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6142 Tmp1).getValue(1);
6143 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6144
6145 // Or in the signbit with integer operations.
6146 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6147 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6148 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6149 if (VT == MVT::f32) {
6150 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6151 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6152 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6153 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6154 }
6155
6156 // f64: Or the high part with signbit and then combine two parts.
6157 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6158 Tmp0);
6159 SDValue Lo = Tmp0.getValue(0);
6160 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6161 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6162 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6163}
6164
6165SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6167 MachineFrameInfo &MFI = MF.getFrameInfo();
6168 MFI.setReturnAddressIsTaken(true);
6169
6171 return SDValue();
6172
6173 EVT VT = Op.getValueType();
6174 SDLoc dl(Op);
6175 unsigned Depth = Op.getConstantOperandVal(0);
6176 if (Depth) {
6177 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6178 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6179 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6180 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6182 }
6183
6184 // Return LR, which contains the return address. Mark it an implicit live-in.
6185 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6186 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6187}
6188
6189SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6190 const ARMBaseRegisterInfo &ARI =
6191 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6193 MachineFrameInfo &MFI = MF.getFrameInfo();
6194 MFI.setFrameAddressIsTaken(true);
6195
6196 EVT VT = Op.getValueType();
6197 SDLoc dl(Op); // FIXME probably not meaningful
6198 unsigned Depth = Op.getConstantOperandVal(0);
6199 Register FrameReg = ARI.getFrameRegister(MF);
6200 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6201 while (Depth--)
6202 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6204 return FrameAddr;
6205}
6206
6207// FIXME? Maybe this could be a TableGen attribute on some registers and
6208// this table could be generated automatically from RegInfo.
6209Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6210 const MachineFunction &MF) const {
6212 .Case("sp", ARM::SP)
6213 .Default(0);
6214 if (Reg)
6215 return Reg;
6216 report_fatal_error(Twine("Invalid register name \""
6217 + StringRef(RegName) + "\"."));
6218}
6219
6220// Result is 64 bit value so split into two 32 bit values and return as a
6221// pair of values.
6223 SelectionDAG &DAG) {
6224 SDLoc DL(N);
6225
6226 // This function is only supposed to be called for i64 type destination.
6227 assert(N->getValueType(0) == MVT::i64
6228 && "ExpandREAD_REGISTER called for non-i64 type result.");
6229
6231 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6232 N->getOperand(0),
6233 N->getOperand(1));
6234
6235 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6236 Read.getValue(1)));
6237 Results.push_back(Read.getOperand(0));
6238}
6239
6240/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6241/// When \p DstVT, the destination type of \p BC, is on the vector
6242/// register bank and the source of bitcast, \p Op, operates on the same bank,
6243/// it might be possible to combine them, such that everything stays on the
6244/// vector register bank.
6245/// \p return The node that would replace \p BT, if the combine
6246/// is possible.
6248 SelectionDAG &DAG) {
6249 SDValue Op = BC->getOperand(0);
6250 EVT DstVT = BC->getValueType(0);
6251
6252 // The only vector instruction that can produce a scalar (remember,
6253 // since the bitcast was about to be turned into VMOVDRR, the source
6254 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6255 // Moreover, we can do this combine only if there is one use.
6256 // Finally, if the destination type is not a vector, there is not
6257 // much point on forcing everything on the vector bank.
6258 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6259 !Op.hasOneUse())
6260 return SDValue();
6261
6262 // If the index is not constant, we will introduce an additional
6263 // multiply that will stick.
6264 // Give up in that case.
6265 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6266 if (!Index)
6267 return SDValue();
6268 unsigned DstNumElt = DstVT.getVectorNumElements();
6269
6270 // Compute the new index.
6271 const APInt &APIntIndex = Index->getAPIntValue();
6272 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6273 NewIndex *= APIntIndex;
6274 // Check if the new constant index fits into i32.
6275 if (NewIndex.getBitWidth() > 32)
6276 return SDValue();
6277
6278 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6279 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6280 SDLoc dl(Op);
6281 SDValue ExtractSrc = Op.getOperand(0);
6282 EVT VecVT = EVT::getVectorVT(
6283 *DAG.getContext(), DstVT.getScalarType(),
6284 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6285 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6286 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6287 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6288}
6289
6290/// ExpandBITCAST - If the target supports VFP, this function is called to
6291/// expand a bit convert where either the source or destination type is i64 to
6292/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6293/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6294/// vectors), since the legalizer won't know what to do with that.
6295SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6296 const ARMSubtarget *Subtarget) const {
6297 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6298 SDLoc dl(N);
6299 SDValue Op = N->getOperand(0);
6300
6301 // This function is only supposed to be called for i16 and i64 types, either
6302 // as the source or destination of the bit convert.
6303 EVT SrcVT = Op.getValueType();
6304 EVT DstVT = N->getValueType(0);
6305
6306 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6307 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6308 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6309 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6310
6311 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6312 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
6313 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
6314 Op = DAG.getBitcast(MVT::f16, Op);
6315 return DAG.getNode(
6316 ISD::TRUNCATE, SDLoc(N), DstVT,
6317 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6318 }
6319
6320 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6321 return SDValue();
6322
6323 // Turn i64->f64 into VMOVDRR.
6324 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6325 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6326 // if we can combine the bitcast with its source.
6328 return Val;
6329 SDValue Lo, Hi;
6330 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6331 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6332 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6333 }
6334
6335 // Turn f64->i64 into VMOVRRD.
6336 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6337 SDValue Cvt;
6338 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6339 SrcVT.getVectorNumElements() > 1)
6340 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6341 DAG.getVTList(MVT::i32, MVT::i32),
6342 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6343 else
6344 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6345 DAG.getVTList(MVT::i32, MVT::i32), Op);
6346 // Merge the pieces into a single i64 value.
6347 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6348 }
6349
6350 return SDValue();
6351}
6352
6353/// getZeroVector - Returns a vector of specified type with all zero elements.
6354/// Zero vectors are used to represent vector negation and in those cases
6355/// will be implemented with the NEON VNEG instruction. However, VNEG does
6356/// not support i64 elements, so sometimes the zero vectors will need to be
6357/// explicitly constructed. Regardless, use a canonical VMOV to create the
6358/// zero vector.
6359static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6360 assert(VT.isVector() && "Expected a vector type");
6361 // The canonical modified immediate encoding of a zero vector is....0!
6362 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6363 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6364 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6365 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6366}
6367
6368/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6369/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6370SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6371 SelectionDAG &DAG) const {
6372 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6373 EVT VT = Op.getValueType();
6374 unsigned VTBits = VT.getSizeInBits();
6375 SDLoc dl(Op);
6376 SDValue ShOpLo = Op.getOperand(0);
6377 SDValue ShOpHi = Op.getOperand(1);
6378 SDValue ShAmt = Op.getOperand(2);
6379 SDValue ARMcc;
6380 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6381
6382 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6383
6384 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6385 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6386 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6387 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6388 DAG.getConstant(VTBits, dl, MVT::i32));
6389 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6390 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6391 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6392 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6393 ISD::SETGE, ARMcc, DAG, dl);
6394 SDValue Lo =
6395 DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6396
6397 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6398 SDValue HiBigShift = Opc == ISD::SRA
6399 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6400 DAG.getConstant(VTBits - 1, dl, VT))
6401 : DAG.getConstant(0, dl, VT);
6402 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6403 ISD::SETGE, ARMcc, DAG, dl);
6404 SDValue Hi =
6405 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6406
6407 SDValue Ops[2] = { Lo, Hi };
6408 return DAG.getMergeValues(Ops, dl);
6409}
6410
6411/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6412/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6413SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6414 SelectionDAG &DAG) const {
6415 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6416 EVT VT = Op.getValueType();
6417 unsigned VTBits = VT.getSizeInBits();
6418 SDLoc dl(Op);
6419 SDValue ShOpLo = Op.getOperand(0);
6420 SDValue ShOpHi = Op.getOperand(1);
6421 SDValue ShAmt = Op.getOperand(2);
6422 SDValue ARMcc;
6423
6424 assert(Op.getOpcode() == ISD::SHL_PARTS);
6425 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6426 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6427 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6428 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6429 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6430
6431 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6432 DAG.getConstant(VTBits, dl, MVT::i32));
6433 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6434 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6435 ISD::SETGE, ARMcc, DAG, dl);
6436 SDValue Hi =
6437 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6438
6439 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6440 ISD::SETGE, ARMcc, DAG, dl);
6441 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6442 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6443 DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
6444
6445 SDValue Ops[2] = { Lo, Hi };
6446 return DAG.getMergeValues(Ops, dl);
6447}
6448
6449SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6450 SelectionDAG &DAG) const {
6451 // The rounding mode is in bits 23:22 of the FPSCR.
6452 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6453 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6454 // so that the shift + and get folded into a bitfield extract.
6455 SDLoc dl(Op);
6456 SDValue Chain = Op.getOperand(0);
6457 SDValue Ops[] = {Chain,
6458 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6459
6460 SDValue FPSCR =
6461 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6462 Chain = FPSCR.getValue(1);
6463 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6464 DAG.getConstant(1U << 22, dl, MVT::i32));
6465 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6466 DAG.getConstant(22, dl, MVT::i32));
6467 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6468 DAG.getConstant(3, dl, MVT::i32));
6469 return DAG.getMergeValues({And, Chain}, dl);
6470}
6471
6472SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6473 SelectionDAG &DAG) const {
6474 SDLoc DL(Op);
6475 SDValue Chain = Op->getOperand(0);
6476 SDValue RMValue = Op->getOperand(1);
6477
6478 // The rounding mode is in bits 23:22 of the FPSCR.
6479 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6480 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6481 // ((arg - 1) & 3) << 22).
6482 //
6483 // It is expected that the argument of llvm.set.rounding is within the
6484 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6485 // responsibility of the code generated llvm.set.rounding to ensure this
6486 // condition.
6487
6488 // Calculate new value of FPSCR[23:22].
6489 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6490 DAG.getConstant(1, DL, MVT::i32));
6491 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6492 DAG.getConstant(0x3, DL, MVT::i32));
6493 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6494 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6495
6496 // Get current value of FPSCR.
6497 SDValue Ops[] = {Chain,
6498 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6499 SDValue FPSCR =
6500 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6501 Chain = FPSCR.getValue(1);
6502 FPSCR = FPSCR.getValue(0);
6503
6504 // Put new rounding mode into FPSCR[23:22].
6505 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6506 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6507 DAG.getConstant(RMMask, DL, MVT::i32));
6508 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6509 SDValue Ops2[] = {
6510 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6511 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6512}
6513
6514SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6515 SelectionDAG &DAG) const {
6516 SDLoc DL(Op);
6517 SDValue Chain = Op->getOperand(0);
6518 SDValue Mode = Op->getOperand(1);
6519
6520 // Generate nodes to build:
6521 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6522 SDValue Ops[] = {Chain,
6523 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6524 SDValue FPSCR =
6525 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6526 Chain = FPSCR.getValue(1);
6527 FPSCR = FPSCR.getValue(0);
6528
6529 SDValue FPSCRMasked =
6530 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6531 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6532 SDValue InputMasked =
6533 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6534 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6535 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6536
6537 SDValue Ops2[] = {
6538 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6539 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6540}
6541
6542SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6543 SelectionDAG &DAG) const {
6544 SDLoc DL(Op);
6545 SDValue Chain = Op->getOperand(0);
6546
6547 // To get the default FP mode all control bits are cleared:
6548 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6549 SDValue Ops[] = {Chain,
6550 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6551 SDValue FPSCR =
6552 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6553 Chain = FPSCR.getValue(1);
6554 FPSCR = FPSCR.getValue(0);
6555
6556 SDValue FPSCRMasked = DAG.getNode(
6557 ISD::AND, DL, MVT::i32, FPSCR,
6559 SDValue Ops2[] = {Chain,
6560 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6561 FPSCRMasked};
6562 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6563}
6564
6566 const ARMSubtarget *ST) {
6567 SDLoc dl(N);
6568 EVT VT = N->getValueType(0);
6569 if (VT.isVector() && ST->hasNEON()) {
6570
6571 // Compute the least significant set bit: LSB = X & -X
6572 SDValue X = N->getOperand(0);
6573 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6574 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6575
6576 EVT ElemTy = VT.getVectorElementType();
6577
6578 if (ElemTy == MVT::i8) {
6579 // Compute with: cttz(x) = ctpop(lsb - 1)
6580 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6581 DAG.getTargetConstant(1, dl, ElemTy));
6582 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6583 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6584 }
6585
6586 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6587 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6588 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6589 unsigned NumBits = ElemTy.getSizeInBits();
6590 SDValue WidthMinus1 =
6591 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6592 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6593 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6594 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6595 }
6596
6597 // Compute with: cttz(x) = ctpop(lsb - 1)
6598
6599 // Compute LSB - 1.
6600 SDValue Bits;
6601 if (ElemTy == MVT::i64) {
6602 // Load constant 0xffff'ffff'ffff'ffff to register.
6603 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6604 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6605 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6606 } else {
6607 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6608 DAG.getTargetConstant(1, dl, ElemTy));
6609 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6610 }
6611 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6612 }
6613
6614 if (!ST->hasV6T2Ops())
6615 return SDValue();
6616
6617 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6618 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6619}
6620
6622 const ARMSubtarget *ST) {
6623 EVT VT = N->getValueType(0);
6624 SDLoc DL(N);
6625
6626 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6627 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6628 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6629 "Unexpected type for custom ctpop lowering");
6630
6631 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6632 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6633 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6634 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6635
6636 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6637 unsigned EltSize = 8;
6638 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6639 while (EltSize != VT.getScalarSizeInBits()) {
6641 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6642 TLI.getPointerTy(DAG.getDataLayout())));
6643 Ops.push_back(Res);
6644
6645 EltSize *= 2;
6646 NumElts /= 2;
6647 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6648 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6649 }
6650
6651 return Res;
6652}
6653
6654/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6655/// operand of a vector shift operation, where all the elements of the
6656/// build_vector must have the same constant integer value.
6657static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6658 // Ignore bit_converts.
6659 while (Op.getOpcode() == ISD::BITCAST)
6660 Op = Op.getOperand(0);
6661 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6662 APInt SplatBits, SplatUndef;
6663 unsigned SplatBitSize;
6664 bool HasAnyUndefs;
6665 if (!BVN ||
6666 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6667 ElementBits) ||
6668 SplatBitSize > ElementBits)
6669 return false;
6670 Cnt = SplatBits.getSExtValue();
6671 return true;
6672}
6673
6674/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6675/// operand of a vector shift left operation. That value must be in the range:
6676/// 0 <= Value < ElementBits for a left shift; or
6677/// 0 <= Value <= ElementBits for a long left shift.
6678static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6679 assert(VT.isVector() && "vector shift count is not a vector type");
6680 int64_t ElementBits = VT.getScalarSizeInBits();
6681 if (!getVShiftImm(Op, ElementBits, Cnt))
6682 return false;
6683 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6684}
6685
6686/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6687/// operand of a vector shift right operation. For a shift opcode, the value
6688/// is positive, but for an intrinsic the value count must be negative. The
6689/// absolute value must be in the range:
6690/// 1 <= |Value| <= ElementBits for a right shift; or
6691/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6692static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6693 int64_t &Cnt) {
6694 assert(VT.isVector() && "vector shift count is not a vector type");
6695 int64_t ElementBits = VT.getScalarSizeInBits();
6696 if (!getVShiftImm(Op, ElementBits, Cnt))
6697 return false;
6698 if (!isIntrinsic)
6699 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6700 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6701 Cnt = -Cnt;
6702 return true;
6703 }
6704 return false;
6705}
6706
6708 const ARMSubtarget *ST) {
6709 EVT VT = N->getValueType(0);
6710 SDLoc dl(N);
6711 int64_t Cnt;
6712
6713 if (!VT.isVector())
6714 return SDValue();
6715
6716 // We essentially have two forms here. Shift by an immediate and shift by a
6717 // vector register (there are also shift by a gpr, but that is just handled
6718 // with a tablegen pattern). We cannot easily match shift by an immediate in
6719 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6720 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6721 // signed or unsigned, and a negative shift indicates a shift right).
6722 if (N->getOpcode() == ISD::SHL) {
6723 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6724 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6725 DAG.getConstant(Cnt, dl, MVT::i32));
6726 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6727 N->getOperand(1));
6728 }
6729
6730 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6731 "unexpected vector shift opcode");
6732
6733 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6734 unsigned VShiftOpc =
6735 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6736 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6737 DAG.getConstant(Cnt, dl, MVT::i32));
6738 }
6739
6740 // Other right shifts we don't have operations for (we use a shift left by a
6741 // negative number).
6742 EVT ShiftVT = N->getOperand(1).getValueType();
6743 SDValue NegatedCount = DAG.getNode(
6744 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6745 unsigned VShiftOpc =
6746 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6747 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6748}
6749
6751 const ARMSubtarget *ST) {
6752 EVT VT = N->getValueType(0);
6753 SDLoc dl(N);
6754
6755 // We can get here for a node like i32 = ISD::SHL i32, i64
6756 if (VT != MVT::i64)
6757 return SDValue();
6758
6759 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6760 N->getOpcode() == ISD::SHL) &&
6761 "Unknown shift to lower!");
6762
6763 unsigned ShOpc = N->getOpcode();
6764 if (ST->hasMVEIntegerOps()) {
6765 SDValue ShAmt = N->getOperand(1);
6766 unsigned ShPartsOpc = ARMISD::LSLL;
6767 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6768
6769 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6770 // then do the default optimisation
6771 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6772 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6773 return SDValue();
6774
6775 // Extract the lower 32 bits of the shift amount if it's not an i32
6776 if (ShAmt->getValueType(0) != MVT::i32)
6777 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6778
6779 if (ShOpc == ISD::SRL) {
6780 if (!Con)
6781 // There is no t2LSRLr instruction so negate and perform an lsll if the
6782 // shift amount is in a register, emulating a right shift.
6783 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6784 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6785 else
6786 // Else generate an lsrl on the immediate shift amount
6787 ShPartsOpc = ARMISD::LSRL;
6788 } else if (ShOpc == ISD::SRA)
6789 ShPartsOpc = ARMISD::ASRL;
6790
6791 // Split Lower/Upper 32 bits of the destination/source
6792 SDValue Lo, Hi;
6793 std::tie(Lo, Hi) =
6794 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6795 // Generate the shift operation as computed above
6796 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6797 ShAmt);
6798 // The upper 32 bits come from the second return value of lsll
6799 Hi = SDValue(Lo.getNode(), 1);
6800 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6801 }
6802
6803 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6804 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6805 return SDValue();
6806
6807 // If we are in thumb mode, we don't have RRX.
6808 if (ST->isThumb1Only())
6809 return SDValue();
6810
6811 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6812 SDValue Lo, Hi;
6813 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6814
6815 // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
6816 // captures the shifted out bit into a carry flag.
6817 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6818 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
6819
6820 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6821 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6822
6823 // Merge the pieces into a single i64 value.
6824 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6825}
6826
6828 const ARMSubtarget *ST) {
6829 bool Invert = false;
6830 bool Swap = false;
6831 unsigned Opc = ARMCC::AL;
6832
6833 SDValue Op0 = Op.getOperand(0);
6834 SDValue Op1 = Op.getOperand(1);
6835 SDValue CC = Op.getOperand(2);
6836 EVT VT = Op.getValueType();
6837 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6838 SDLoc dl(Op);
6839
6840 EVT CmpVT;
6841 if (ST->hasNEON())
6843 else {
6844 assert(ST->hasMVEIntegerOps() &&
6845 "No hardware support for integer vector comparison!");
6846
6847 if (Op.getValueType().getVectorElementType() != MVT::i1)
6848 return SDValue();
6849
6850 // Make sure we expand floating point setcc to scalar if we do not have
6851 // mve.fp, so that we can handle them from there.
6852 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6853 return SDValue();
6854
6855 CmpVT = VT;
6856 }
6857
6858 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6859 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6860 // Special-case integer 64-bit equality comparisons. They aren't legal,
6861 // but they can be lowered with a few vector instructions.
6862 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6863 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6864 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6865 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6866 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6867 DAG.getCondCode(ISD::SETEQ));
6868 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6869 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6870 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6871 if (SetCCOpcode == ISD::SETNE)
6872 Merged = DAG.getNOT(dl, Merged, CmpVT);
6873 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6874 return Merged;
6875 }
6876
6877 if (CmpVT.getVectorElementType() == MVT::i64)
6878 // 64-bit comparisons are not legal in general.
6879 return SDValue();
6880
6881 if (Op1.getValueType().isFloatingPoint()) {
6882 switch (SetCCOpcode) {
6883 default: llvm_unreachable("Illegal FP comparison");
6884 case ISD::SETUNE:
6885 case ISD::SETNE:
6886 if (ST->hasMVEFloatOps()) {
6887 Opc = ARMCC::NE; break;
6888 } else {
6889 Invert = true; [[fallthrough]];
6890 }
6891 case ISD::SETOEQ:
6892 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6893 case ISD::SETOLT:
6894 case ISD::SETLT: Swap = true; [[fallthrough]];
6895 case ISD::SETOGT:
6896 case ISD::SETGT: Opc = ARMCC::GT; break;
6897 case ISD::SETOLE:
6898 case ISD::SETLE: Swap = true; [[fallthrough]];
6899 case ISD::SETOGE:
6900 case ISD::SETGE: Opc = ARMCC::GE; break;
6901 case ISD::SETUGE: Swap = true; [[fallthrough]];
6902 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6903 case ISD::SETUGT: Swap = true; [[fallthrough]];
6904 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6905 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6906 case ISD::SETONE: {
6907 // Expand this to (OLT | OGT).
6908 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6909 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6910 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6911 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6912 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6913 if (Invert)
6914 Result = DAG.getNOT(dl, Result, VT);
6915 return Result;
6916 }
6917 case ISD::SETUO: Invert = true; [[fallthrough]];
6918 case ISD::SETO: {
6919 // Expand this to (OLT | OGE).
6920 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6921 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6922 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6923 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6924 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6925 if (Invert)
6926 Result = DAG.getNOT(dl, Result, VT);
6927 return Result;
6928 }
6929 }
6930 } else {
6931 // Integer comparisons.
6932 switch (SetCCOpcode) {
6933 default: llvm_unreachable("Illegal integer comparison");
6934 case ISD::SETNE:
6935 if (ST->hasMVEIntegerOps()) {
6936 Opc = ARMCC::NE; break;
6937 } else {
6938 Invert = true; [[fallthrough]];
6939 }
6940 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6941 case ISD::SETLT: Swap = true; [[fallthrough]];
6942 case ISD::SETGT: Opc = ARMCC::GT; break;
6943 case ISD::SETLE: Swap = true; [[fallthrough]];
6944 case ISD::SETGE: Opc = ARMCC::GE; break;
6945 case ISD::SETULT: Swap = true; [[fallthrough]];
6946 case ISD::SETUGT: Opc = ARMCC::HI; break;
6947 case ISD::SETULE: Swap = true; [[fallthrough]];
6948 case ISD::SETUGE: Opc = ARMCC::HS; break;
6949 }
6950
6951 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6952 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6953 SDValue AndOp;
6955 AndOp = Op0;
6956 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6957 AndOp = Op1;
6958
6959 // Ignore bitconvert.
6960 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6961 AndOp = AndOp.getOperand(0);
6962
6963 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6964 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6965 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6966 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6967 if (!Invert)
6968 Result = DAG.getNOT(dl, Result, VT);
6969 return Result;
6970 }
6971 }
6972 }
6973
6974 if (Swap)
6975 std::swap(Op0, Op1);
6976
6977 // If one of the operands is a constant vector zero, attempt to fold the
6978 // comparison to a specialized compare-against-zero form.
6980 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6981 Opc == ARMCC::NE)) {
6982 if (Opc == ARMCC::GE)
6983 Opc = ARMCC::LE;
6984 else if (Opc == ARMCC::GT)
6985 Opc = ARMCC::LT;
6986 std::swap(Op0, Op1);
6987 }
6988
6989 SDValue Result;
6991 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6992 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6993 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6994 DAG.getConstant(Opc, dl, MVT::i32));
6995 else
6996 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6997 DAG.getConstant(Opc, dl, MVT::i32));
6998
6999 Result = DAG.getSExtOrTrunc(Result, dl, VT);
7000
7001 if (Invert)
7002 Result = DAG.getNOT(dl, Result, VT);
7003
7004 return Result;
7005}
7006
7008 SDValue LHS = Op.getOperand(0);
7009 SDValue RHS = Op.getOperand(1);
7010 SDValue Carry = Op.getOperand(2);
7011 SDValue Cond = Op.getOperand(3);
7012 SDLoc DL(Op);
7013
7014 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
7015
7016 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
7017 // have to invert the carry first.
7018 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7019 DAG.getConstant(1, DL, MVT::i32), Carry);
7020 // This converts the boolean value carry into the carry flag.
7021 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7022
7023 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
7024 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
7025
7026 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
7027 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
7028 SDValue ARMcc = DAG.getConstant(
7029 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
7030 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
7031 Cmp.getValue(1));
7032}
7033
7034/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
7035/// valid vector constant for a NEON or MVE instruction with a "modified
7036/// immediate" operand (e.g., VMOV). If so, return the encoded value.
7037static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
7038 unsigned SplatBitSize, SelectionDAG &DAG,
7039 const SDLoc &dl, EVT &VT, EVT VectorVT,
7040 VMOVModImmType type) {
7041 unsigned OpCmode, Imm;
7042 bool is128Bits = VectorVT.is128BitVector();
7043
7044 // SplatBitSize is set to the smallest size that splats the vector, so a
7045 // zero vector will always have SplatBitSize == 8. However, NEON modified
7046 // immediate instructions others than VMOV do not support the 8-bit encoding
7047 // of a zero vector, and the default encoding of zero is supposed to be the
7048 // 32-bit version.
7049 if (SplatBits == 0)
7050 SplatBitSize = 32;
7051
7052 switch (SplatBitSize) {
7053 case 8:
7054 if (type != VMOVModImm)
7055 return SDValue();
7056 // Any 1-byte value is OK. Op=0, Cmode=1110.
7057 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
7058 OpCmode = 0xe;
7059 Imm = SplatBits;
7060 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
7061 break;
7062
7063 case 16:
7064 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
7065 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
7066 if ((SplatBits & ~0xff) == 0) {
7067 // Value = 0x00nn: Op=x, Cmode=100x.
7068 OpCmode = 0x8;
7069 Imm = SplatBits;
7070 break;
7071 }
7072 if ((SplatBits & ~0xff00) == 0) {
7073 // Value = 0xnn00: Op=x, Cmode=101x.
7074 OpCmode = 0xa;
7075 Imm = SplatBits >> 8;
7076 break;
7077 }
7078 return SDValue();
7079
7080 case 32:
7081 // NEON's 32-bit VMOV supports splat values where:
7082 // * only one byte is nonzero, or
7083 // * the least significant byte is 0xff and the second byte is nonzero, or
7084 // * the least significant 2 bytes are 0xff and the third is nonzero.
7085 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7086 if ((SplatBits & ~0xff) == 0) {
7087 // Value = 0x000000nn: Op=x, Cmode=000x.
7088 OpCmode = 0;
7089 Imm = SplatBits;
7090 break;
7091 }
7092 if ((SplatBits & ~0xff00) == 0) {
7093 // Value = 0x0000nn00: Op=x, Cmode=001x.
7094 OpCmode = 0x2;
7095 Imm = SplatBits >> 8;
7096 break;
7097 }
7098 if ((SplatBits & ~0xff0000) == 0) {
7099 // Value = 0x00nn0000: Op=x, Cmode=010x.
7100 OpCmode = 0x4;
7101 Imm = SplatBits >> 16;
7102 break;
7103 }
7104 if ((SplatBits & ~0xff000000) == 0) {
7105 // Value = 0xnn000000: Op=x, Cmode=011x.
7106 OpCmode = 0x6;
7107 Imm = SplatBits >> 24;
7108 break;
7109 }
7110
7111 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7112 if (type == OtherModImm) return SDValue();
7113
7114 if ((SplatBits & ~0xffff) == 0 &&
7115 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7116 // Value = 0x0000nnff: Op=x, Cmode=1100.
7117 OpCmode = 0xc;
7118 Imm = SplatBits >> 8;
7119 break;
7120 }
7121
7122 // cmode == 0b1101 is not supported for MVE VMVN
7123 if (type == MVEVMVNModImm)
7124 return SDValue();
7125
7126 if ((SplatBits & ~0xffffff) == 0 &&
7127 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7128 // Value = 0x00nnffff: Op=x, Cmode=1101.
7129 OpCmode = 0xd;
7130 Imm = SplatBits >> 16;
7131 break;
7132 }
7133
7134 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7135 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7136 // VMOV.I32. A (very) minor optimization would be to replicate the value
7137 // and fall through here to test for a valid 64-bit splat. But, then the
7138 // caller would also need to check and handle the change in size.
7139 return SDValue();
7140
7141 case 64: {
7142 if (type != VMOVModImm)
7143 return SDValue();
7144 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7145 uint64_t BitMask = 0xff;
7146 unsigned ImmMask = 1;
7147 Imm = 0;
7148 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7149 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7150 Imm |= ImmMask;
7151 } else if ((SplatBits & BitMask) != 0) {
7152 return SDValue();
7153 }
7154 BitMask <<= 8;
7155 ImmMask <<= 1;
7156 }
7157
7158 // Op=1, Cmode=1110.
7159 OpCmode = 0x1e;
7160 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7161 break;
7162 }
7163
7164 default:
7165 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7166 }
7167
7168 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7169 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7170}
7171
7172SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7173 const ARMSubtarget *ST) const {
7174 EVT VT = Op.getValueType();
7175 bool IsDouble = (VT == MVT::f64);
7176 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7177 const APFloat &FPVal = CFP->getValueAPF();
7178
7179 // Prevent floating-point constants from using literal loads
7180 // when execute-only is enabled.
7181 if (ST->genExecuteOnly()) {
7182 // We shouldn't trigger this for v6m execute-only
7183 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7184 "Unexpected architecture");
7185
7186 // If we can represent the constant as an immediate, don't lower it
7187 if (isFPImmLegal(FPVal, VT))
7188 return Op;
7189 // Otherwise, construct as integer, and move to float register
7190 APInt INTVal = FPVal.bitcastToAPInt();
7191 SDLoc DL(CFP);
7192 switch (VT.getSimpleVT().SimpleTy) {
7193 default:
7194 llvm_unreachable("Unknown floating point type!");
7195 break;
7196 case MVT::f64: {
7197 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7198 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7199 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7200 }
7201 case MVT::f32:
7202 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7203 DAG.getConstant(INTVal, DL, MVT::i32));
7204 }
7205 }
7206
7207 if (!ST->hasVFP3Base())
7208 return SDValue();
7209
7210 // Use the default (constant pool) lowering for double constants when we have
7211 // an SP-only FPU
7212 if (IsDouble && !Subtarget->hasFP64())
7213 return SDValue();
7214
7215 // Try splatting with a VMOV.f32...
7216 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7217
7218 if (ImmVal != -1) {
7219 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7220 // We have code in place to select a valid ConstantFP already, no need to
7221 // do any mangling.
7222 return Op;
7223 }
7224
7225 // It's a float and we are trying to use NEON operations where
7226 // possible. Lower it to a splat followed by an extract.
7227 SDLoc DL(Op);
7228 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7229 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7230 NewVal);
7231 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7232 DAG.getConstant(0, DL, MVT::i32));
7233 }
7234
7235 // The rest of our options are NEON only, make sure that's allowed before
7236 // proceeding..
7237 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7238 return SDValue();
7239
7240 EVT VMovVT;
7241 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7242
7243 // It wouldn't really be worth bothering for doubles except for one very
7244 // important value, which does happen to match: 0.0. So make sure we don't do
7245 // anything stupid.
7246 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7247 return SDValue();
7248
7249 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7250 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7251 VMovVT, VT, VMOVModImm);
7252 if (NewVal != SDValue()) {
7253 SDLoc DL(Op);
7254 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7255 NewVal);
7256 if (IsDouble)
7257 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7258
7259 // It's a float: cast and extract a vector element.
7260 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7261 VecConstant);
7262 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7263 DAG.getConstant(0, DL, MVT::i32));
7264 }
7265
7266 // Finally, try a VMVN.i32
7267 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7268 VT, VMVNModImm);
7269 if (NewVal != SDValue()) {
7270 SDLoc DL(Op);
7271 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7272
7273 if (IsDouble)
7274 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7275
7276 // It's a float: cast and extract a vector element.
7277 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7278 VecConstant);
7279 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7280 DAG.getConstant(0, DL, MVT::i32));
7281 }
7282
7283 return SDValue();
7284}
7285
7286// check if an VEXT instruction can handle the shuffle mask when the
7287// vector sources of the shuffle are the same.
7288static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7289 unsigned NumElts = VT.getVectorNumElements();
7290
7291 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7292 if (M[0] < 0)
7293 return false;
7294
7295 Imm = M[0];
7296
7297 // If this is a VEXT shuffle, the immediate value is the index of the first
7298 // element. The other shuffle indices must be the successive elements after
7299 // the first one.
7300 unsigned ExpectedElt = Imm;
7301 for (unsigned i = 1; i < NumElts; ++i) {
7302 // Increment the expected index. If it wraps around, just follow it
7303 // back to index zero and keep going.
7304 ++ExpectedElt;
7305 if (ExpectedElt == NumElts)
7306 ExpectedElt = 0;
7307
7308 if (M[i] < 0) continue; // ignore UNDEF indices
7309 if (ExpectedElt != static_cast<unsigned>(M[i]))
7310 return false;
7311 }
7312
7313 return true;
7314}
7315
7316static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7317 bool &ReverseVEXT, unsigned &Imm) {
7318 unsigned NumElts = VT.getVectorNumElements();
7319 ReverseVEXT = false;
7320
7321 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7322 if (M[0] < 0)
7323 return false;
7324
7325 Imm = M[0];
7326
7327 // If this is a VEXT shuffle, the immediate value is the index of the first
7328 // element. The other shuffle indices must be the successive elements after
7329 // the first one.
7330 unsigned ExpectedElt = Imm;
7331 for (unsigned i = 1; i < NumElts; ++i) {
7332 // Increment the expected index. If it wraps around, it may still be
7333 // a VEXT but the source vectors must be swapped.
7334 ExpectedElt += 1;
7335 if (ExpectedElt == NumElts * 2) {
7336 ExpectedElt = 0;
7337 ReverseVEXT = true;
7338 }
7339
7340 if (M[i] < 0) continue; // ignore UNDEF indices
7341 if (ExpectedElt != static_cast<unsigned>(M[i]))
7342 return false;
7343 }
7344
7345 // Adjust the index value if the source operands will be swapped.
7346 if (ReverseVEXT)
7347 Imm -= NumElts;
7348
7349 return true;
7350}
7351
7352static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7353 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7354 // range, then 0 is placed into the resulting vector. So pretty much any mask
7355 // of 8 elements can work here.
7356 return VT == MVT::v8i8 && M.size() == 8;
7357}
7358
7359static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7360 unsigned Index) {
7361 if (Mask.size() == Elements * 2)
7362 return Index / Elements;
7363 return Mask[Index] == 0 ? 0 : 1;
7364}
7365
7366// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7367// checking that pairs of elements in the shuffle mask represent the same index
7368// in each vector, incrementing the expected index by 2 at each step.
7369// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7370// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7371// v2={e,f,g,h}
7372// WhichResult gives the offset for each element in the mask based on which
7373// of the two results it belongs to.
7374//
7375// The transpose can be represented either as:
7376// result1 = shufflevector v1, v2, result1_shuffle_mask
7377// result2 = shufflevector v1, v2, result2_shuffle_mask
7378// where v1/v2 and the shuffle masks have the same number of elements
7379// (here WhichResult (see below) indicates which result is being checked)
7380//
7381// or as:
7382// results = shufflevector v1, v2, shuffle_mask
7383// where both results are returned in one vector and the shuffle mask has twice
7384// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7385// want to check the low half and high half of the shuffle mask as if it were
7386// the other case
7387static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7388 unsigned EltSz = VT.getScalarSizeInBits();
7389 if (EltSz == 64)
7390 return false;
7391
7392 unsigned NumElts = VT.getVectorNumElements();
7393 if (M.size() != NumElts && M.size() != NumElts*2)
7394 return false;
7395
7396 // If the mask is twice as long as the input vector then we need to check the
7397 // upper and lower parts of the mask with a matching value for WhichResult
7398 // FIXME: A mask with only even values will be rejected in case the first
7399 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7400 // M[0] is used to determine WhichResult
7401 for (unsigned i = 0; i < M.size(); i += NumElts) {
7402 WhichResult = SelectPairHalf(NumElts, M, i);
7403 for (unsigned j = 0; j < NumElts; j += 2) {
7404 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7405 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7406 return false;
7407 }
7408 }
7409
7410 if (M.size() == NumElts*2)
7411 WhichResult = 0;
7412
7413 return true;
7414}
7415
7416/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7417/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7418/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7419static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7420 unsigned EltSz = VT.getScalarSizeInBits();
7421 if (EltSz == 64)
7422 return false;
7423
7424 unsigned NumElts = VT.getVectorNumElements();
7425 if (M.size() != NumElts && M.size() != NumElts*2)
7426 return false;
7427
7428 for (unsigned i = 0; i < M.size(); i += NumElts) {
7429 WhichResult = SelectPairHalf(NumElts, M, i);
7430 for (unsigned j = 0; j < NumElts; j += 2) {
7431 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7432 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7433 return false;
7434 }
7435 }
7436
7437 if (M.size() == NumElts*2)
7438 WhichResult = 0;
7439
7440 return true;
7441}
7442
7443// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7444// that the mask elements are either all even and in steps of size 2 or all odd
7445// and in steps of size 2.
7446// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7447// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7448// v2={e,f,g,h}
7449// Requires similar checks to that of isVTRNMask with
7450// respect the how results are returned.
7451static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7452 unsigned EltSz = VT.getScalarSizeInBits();
7453 if (EltSz == 64)
7454 return false;
7455
7456 unsigned NumElts = VT.getVectorNumElements();
7457 if (M.size() != NumElts && M.size() != NumElts*2)
7458 return false;
7459
7460 for (unsigned i = 0; i < M.size(); i += NumElts) {
7461 WhichResult = SelectPairHalf(NumElts, M, i);
7462 for (unsigned j = 0; j < NumElts; ++j) {
7463 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7464 return false;
7465 }
7466 }
7467
7468 if (M.size() == NumElts*2)
7469 WhichResult = 0;
7470
7471 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7472 if (VT.is64BitVector() && EltSz == 32)
7473 return false;
7474
7475 return true;
7476}
7477
7478/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7479/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7480/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7481static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7482 unsigned EltSz = VT.getScalarSizeInBits();
7483 if (EltSz == 64)
7484 return false;
7485
7486 unsigned NumElts = VT.getVectorNumElements();
7487 if (M.size() != NumElts && M.size() != NumElts*2)
7488 return false;
7489
7490 unsigned Half = NumElts / 2;
7491 for (unsigned i = 0; i < M.size(); i += NumElts) {
7492 WhichResult = SelectPairHalf(NumElts, M, i);
7493 for (unsigned j = 0; j < NumElts; j += Half) {
7494 unsigned Idx = WhichResult;
7495 for (unsigned k = 0; k < Half; ++k) {
7496 int MIdx = M[i + j + k];
7497 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7498 return false;
7499 Idx += 2;
7500 }
7501 }
7502 }
7503
7504 if (M.size() == NumElts*2)
7505 WhichResult = 0;
7506
7507 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7508 if (VT.is64BitVector() && EltSz == 32)
7509 return false;
7510
7511 return true;
7512}
7513
7514// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7515// that pairs of elements of the shufflemask represent the same index in each
7516// vector incrementing sequentially through the vectors.
7517// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7518// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7519// v2={e,f,g,h}
7520// Requires similar checks to that of isVTRNMask with respect the how results
7521// are returned.
7522static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7523 unsigned EltSz = VT.getScalarSizeInBits();
7524 if (EltSz == 64)
7525 return false;
7526
7527 unsigned NumElts = VT.getVectorNumElements();
7528 if (M.size() != NumElts && M.size() != NumElts*2)
7529 return false;
7530
7531 for (unsigned i = 0; i < M.size(); i += NumElts) {
7532 WhichResult = SelectPairHalf(NumElts, M, i);
7533 unsigned Idx = WhichResult * NumElts / 2;
7534 for (unsigned j = 0; j < NumElts; j += 2) {
7535 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7536 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7537 return false;
7538 Idx += 1;
7539 }
7540 }
7541
7542 if (M.size() == NumElts*2)
7543 WhichResult = 0;
7544
7545 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7546 if (VT.is64BitVector() && EltSz == 32)
7547 return false;
7548
7549 return true;
7550}
7551
7552/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7553/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7554/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7555static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7556 unsigned EltSz = VT.getScalarSizeInBits();
7557 if (EltSz == 64)
7558 return false;
7559
7560 unsigned NumElts = VT.getVectorNumElements();
7561 if (M.size() != NumElts && M.size() != NumElts*2)
7562 return false;
7563
7564 for (unsigned i = 0; i < M.size(); i += NumElts) {
7565 WhichResult = SelectPairHalf(NumElts, M, i);
7566 unsigned Idx = WhichResult * NumElts / 2;
7567 for (unsigned j = 0; j < NumElts; j += 2) {
7568 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7569 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7570 return false;
7571 Idx += 1;
7572 }
7573 }
7574
7575 if (M.size() == NumElts*2)
7576 WhichResult = 0;
7577
7578 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7579 if (VT.is64BitVector() && EltSz == 32)
7580 return false;
7581
7582 return true;
7583}
7584
7585/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7586/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7587static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7588 unsigned &WhichResult,
7589 bool &isV_UNDEF) {
7590 isV_UNDEF = false;
7591 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7592 return ARMISD::VTRN;
7593 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7594 return ARMISD::VUZP;
7595 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7596 return ARMISD::VZIP;
7597
7598 isV_UNDEF = true;
7599 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7600 return ARMISD::VTRN;
7601 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7602 return ARMISD::VUZP;
7603 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7604 return ARMISD::VZIP;
7605
7606 return 0;
7607}
7608
7609/// \return true if this is a reverse operation on an vector.
7610static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7611 unsigned NumElts = VT.getVectorNumElements();
7612 // Make sure the mask has the right size.
7613 if (NumElts != M.size())
7614 return false;
7615
7616 // Look for <15, ..., 3, -1, 1, 0>.
7617 for (unsigned i = 0; i != NumElts; ++i)
7618 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7619 return false;
7620
7621 return true;
7622}
7623
7624static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7625 unsigned NumElts = VT.getVectorNumElements();
7626 // Make sure the mask has the right size.
7627 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7628 return false;
7629
7630 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7631 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7632 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7633 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7634 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7635 int Ofs = Top ? 1 : 0;
7636 int Upper = SingleSource ? 0 : NumElts;
7637 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7638 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7639 return false;
7640 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7641 return false;
7642 }
7643 return true;
7644}
7645
7646static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7647 unsigned NumElts = VT.getVectorNumElements();
7648 // Make sure the mask has the right size.
7649 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7650 return false;
7651
7652 // If Top
7653 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7654 // This inserts Input2 into Input1
7655 // else if not Top
7656 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7657 // This inserts Input1 into Input2
7658 unsigned Offset = Top ? 0 : 1;
7659 unsigned N = SingleSource ? 0 : NumElts;
7660 for (unsigned i = 0; i < NumElts; i += 2) {
7661 if (M[i] >= 0 && M[i] != (int)i)
7662 return false;
7663 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7664 return false;
7665 }
7666
7667 return true;
7668}
7669
7670static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7671 unsigned NumElts = ToVT.getVectorNumElements();
7672 if (NumElts != M.size())
7673 return false;
7674
7675 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7676 // looking for patterns of:
7677 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7678 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7679
7680 unsigned Off0 = rev ? NumElts / 2 : 0;
7681 unsigned Off1 = rev ? 0 : NumElts / 2;
7682 for (unsigned i = 0; i < NumElts; i += 2) {
7683 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7684 return false;
7685 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7686 return false;
7687 }
7688
7689 return true;
7690}
7691
7692// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7693// from a pair of inputs. For example:
7694// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7695// FP_ROUND(EXTRACT_ELT(Y, 0),
7696// FP_ROUND(EXTRACT_ELT(X, 1),
7697// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7699 const ARMSubtarget *ST) {
7700 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7701 if (!ST->hasMVEFloatOps())
7702 return SDValue();
7703
7704 SDLoc dl(BV);
7705 EVT VT = BV.getValueType();
7706 if (VT != MVT::v8f16)
7707 return SDValue();
7708
7709 // We are looking for a buildvector of fptrunc elements, where all the
7710 // elements are interleavingly extracted from two sources. Check the first two
7711 // items are valid enough and extract some info from them (they are checked
7712 // properly in the loop below).
7713 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7716 return SDValue();
7717 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7720 return SDValue();
7721 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7722 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7723 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7724 return SDValue();
7725
7726 // Check all the values in the BuildVector line up with our expectations.
7727 for (unsigned i = 1; i < 4; i++) {
7728 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7729 return Trunc.getOpcode() == ISD::FP_ROUND &&
7731 Trunc.getOperand(0).getOperand(0) == Op &&
7732 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7733 };
7734 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7735 return SDValue();
7736 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7737 return SDValue();
7738 }
7739
7740 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7741 DAG.getConstant(0, dl, MVT::i32));
7742 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7743 DAG.getConstant(1, dl, MVT::i32));
7744}
7745
7746// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7747// from a single input on alternating lanes. For example:
7748// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7749// FP_ROUND(EXTRACT_ELT(X, 2),
7750// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7752 const ARMSubtarget *ST) {
7753 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7754 if (!ST->hasMVEFloatOps())
7755 return SDValue();
7756
7757 SDLoc dl(BV);
7758 EVT VT = BV.getValueType();
7759 if (VT != MVT::v4f32)
7760 return SDValue();
7761
7762 // We are looking for a buildvector of fptext elements, where all the
7763 // elements are alternating lanes from a single source. For example <0,2,4,6>
7764 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7765 // info from them (they are checked properly in the loop below).
7766 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7768 return SDValue();
7769 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7771 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7772 return SDValue();
7773
7774 // Check all the values in the BuildVector line up with our expectations.
7775 for (unsigned i = 1; i < 4; i++) {
7776 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7777 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7779 Trunc.getOperand(0).getOperand(0) == Op &&
7780 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7781 };
7782 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7783 return SDValue();
7784 }
7785
7786 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7787 DAG.getConstant(Offset, dl, MVT::i32));
7788}
7789
7790// If N is an integer constant that can be moved into a register in one
7791// instruction, return an SDValue of such a constant (will become a MOV
7792// instruction). Otherwise return null.
7794 const ARMSubtarget *ST, const SDLoc &dl) {
7795 uint64_t Val;
7796 if (!isa<ConstantSDNode>(N))
7797 return SDValue();
7798 Val = N->getAsZExtVal();
7799
7800 if (ST->isThumb1Only()) {
7801 if (Val <= 255 || ~Val <= 255)
7802 return DAG.getConstant(Val, dl, MVT::i32);
7803 } else {
7804 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7805 return DAG.getConstant(Val, dl, MVT::i32);
7806 }
7807 return SDValue();
7808}
7809
7811 const ARMSubtarget *ST) {
7812 SDLoc dl(Op);
7813 EVT VT = Op.getValueType();
7814
7815 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7816
7817 unsigned NumElts = VT.getVectorNumElements();
7818 unsigned BoolMask;
7819 unsigned BitsPerBool;
7820 if (NumElts == 2) {
7821 BitsPerBool = 8;
7822 BoolMask = 0xff;
7823 } else if (NumElts == 4) {
7824 BitsPerBool = 4;
7825 BoolMask = 0xf;
7826 } else if (NumElts == 8) {
7827 BitsPerBool = 2;
7828 BoolMask = 0x3;
7829 } else if (NumElts == 16) {
7830 BitsPerBool = 1;
7831 BoolMask = 0x1;
7832 } else
7833 return SDValue();
7834
7835 // If this is a single value copied into all lanes (a splat), we can just sign
7836 // extend that single value
7837 SDValue FirstOp = Op.getOperand(0);
7838 if (!isa<ConstantSDNode>(FirstOp) &&
7839 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7840 return U.get().isUndef() || U.get() == FirstOp;
7841 })) {
7842 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7843 DAG.getValueType(MVT::i1));
7844 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7845 }
7846
7847 // First create base with bits set where known
7848 unsigned Bits32 = 0;
7849 for (unsigned i = 0; i < NumElts; ++i) {
7850 SDValue V = Op.getOperand(i);
7851 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7852 continue;
7853 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7854 if (BitSet)
7855 Bits32 |= BoolMask << (i * BitsPerBool);
7856 }
7857
7858 // Add in unknown nodes
7860 DAG.getConstant(Bits32, dl, MVT::i32));
7861 for (unsigned i = 0; i < NumElts; ++i) {
7862 SDValue V = Op.getOperand(i);
7863 if (isa<ConstantSDNode>(V) || V.isUndef())
7864 continue;
7865 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7866 DAG.getConstant(i, dl, MVT::i32));
7867 }
7868
7869 return Base;
7870}
7871
7873 const ARMSubtarget *ST) {
7874 if (!ST->hasMVEIntegerOps())
7875 return SDValue();
7876
7877 // We are looking for a buildvector where each element is Op[0] + i*N
7878 EVT VT = Op.getValueType();
7879 SDValue Op0 = Op.getOperand(0);
7880 unsigned NumElts = VT.getVectorNumElements();
7881
7882 // Get the increment value from operand 1
7883 SDValue Op1 = Op.getOperand(1);
7884 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7885 !isa<ConstantSDNode>(Op1.getOperand(1)))
7886 return SDValue();
7887 unsigned N = Op1.getConstantOperandVal(1);
7888 if (N != 1 && N != 2 && N != 4 && N != 8)
7889 return SDValue();
7890
7891 // Check that each other operand matches
7892 for (unsigned I = 2; I < NumElts; I++) {
7893 SDValue OpI = Op.getOperand(I);
7894 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7895 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7896 OpI.getConstantOperandVal(1) != I * N)
7897 return SDValue();
7898 }
7899
7900 SDLoc DL(Op);
7901 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7902 DAG.getConstant(N, DL, MVT::i32));
7903}
7904
7905// Returns true if the operation N can be treated as qr instruction variant at
7906// operand Op.
7907static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7908 switch (N->getOpcode()) {
7909 case ISD::ADD:
7910 case ISD::MUL:
7911 case ISD::SADDSAT:
7912 case ISD::UADDSAT:
7913 case ISD::AVGFLOORS:
7914 case ISD::AVGFLOORU:
7915 return true;
7916 case ISD::SUB:
7917 case ISD::SSUBSAT:
7918 case ISD::USUBSAT:
7919 return N->getOperand(1).getNode() == Op;
7921 switch (N->getConstantOperandVal(0)) {
7922 case Intrinsic::arm_mve_add_predicated:
7923 case Intrinsic::arm_mve_mul_predicated:
7924 case Intrinsic::arm_mve_qadd_predicated:
7925 case Intrinsic::arm_mve_vhadd:
7926 case Intrinsic::arm_mve_hadd_predicated:
7927 case Intrinsic::arm_mve_vqdmulh:
7928 case Intrinsic::arm_mve_qdmulh_predicated:
7929 case Intrinsic::arm_mve_vqrdmulh:
7930 case Intrinsic::arm_mve_qrdmulh_predicated:
7931 case Intrinsic::arm_mve_vqdmull:
7932 case Intrinsic::arm_mve_vqdmull_predicated:
7933 return true;
7934 case Intrinsic::arm_mve_sub_predicated:
7935 case Intrinsic::arm_mve_qsub_predicated:
7936 case Intrinsic::arm_mve_vhsub:
7937 case Intrinsic::arm_mve_hsub_predicated:
7938 return N->getOperand(2).getNode() == Op;
7939 default:
7940 return false;
7941 }
7942 default:
7943 return false;
7944 }
7945}
7946
7947// If this is a case we can't handle, return null and let the default
7948// expansion code take care of it.
7949SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7950 const ARMSubtarget *ST) const {
7951 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7952 SDLoc dl(Op);
7953 EVT VT = Op.getValueType();
7954
7955 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7956 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7957
7958 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7959 return R;
7960
7961 APInt SplatBits, SplatUndef;
7962 unsigned SplatBitSize;
7963 bool HasAnyUndefs;
7964 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7965 if (SplatUndef.isAllOnes())
7966 return DAG.getUNDEF(VT);
7967
7968 // If all the users of this constant splat are qr instruction variants,
7969 // generate a vdup of the constant.
7970 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7971 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7972 all_of(BVN->users(),
7973 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7974 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7975 : SplatBitSize == 16 ? MVT::v8i16
7976 : MVT::v16i8;
7977 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7978 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7979 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7980 }
7981
7982 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7983 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7984 // Check if an immediate VMOV works.
7985 EVT VmovVT;
7986 SDValue Val =
7987 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7988 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7989
7990 if (Val.getNode()) {
7991 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7992 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7993 }
7994
7995 // Try an immediate VMVN.
7996 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7997 Val = isVMOVModifiedImm(
7998 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7999 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
8000 if (Val.getNode()) {
8001 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
8002 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
8003 }
8004
8005 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
8006 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
8007 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
8008 if (ImmVal != -1) {
8009 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
8010 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
8011 }
8012 }
8013
8014 // If we are under MVE, generate a VDUP(constant), bitcast to the original
8015 // type.
8016 if (ST->hasMVEIntegerOps() &&
8017 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
8018 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
8019 : SplatBitSize == 16 ? MVT::v8i16
8020 : MVT::v16i8;
8021 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
8022 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
8023 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
8024 }
8025 }
8026 }
8027
8028 // Scan through the operands to see if only one value is used.
8029 //
8030 // As an optimisation, even if more than one value is used it may be more
8031 // profitable to splat with one value then change some lanes.
8032 //
8033 // Heuristically we decide to do this if the vector has a "dominant" value,
8034 // defined as splatted to more than half of the lanes.
8035 unsigned NumElts = VT.getVectorNumElements();
8036 bool isOnlyLowElement = true;
8037 bool usesOnlyOneValue = true;
8038 bool hasDominantValue = false;
8039 bool isConstant = true;
8040
8041 // Map of the number of times a particular SDValue appears in the
8042 // element list.
8043 DenseMap<SDValue, unsigned> ValueCounts;
8044 SDValue Value;
8045 for (unsigned i = 0; i < NumElts; ++i) {
8046 SDValue V = Op.getOperand(i);
8047 if (V.isUndef())
8048 continue;
8049 if (i > 0)
8050 isOnlyLowElement = false;
8051 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
8052 isConstant = false;
8053
8054 unsigned &Count = ValueCounts[V];
8055
8056 // Is this value dominant? (takes up more than half of the lanes)
8057 if (++Count > (NumElts / 2)) {
8058 hasDominantValue = true;
8059 Value = V;
8060 }
8061 }
8062 if (ValueCounts.size() != 1)
8063 usesOnlyOneValue = false;
8064 if (!Value.getNode() && !ValueCounts.empty())
8065 Value = ValueCounts.begin()->first;
8066
8067 if (ValueCounts.empty())
8068 return DAG.getUNDEF(VT);
8069
8070 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8071 // Keep going if we are hitting this case.
8072 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8073 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8074
8075 unsigned EltSize = VT.getScalarSizeInBits();
8076
8077 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8078 // i32 and try again.
8079 if (hasDominantValue && EltSize <= 32) {
8080 if (!isConstant) {
8081 SDValue N;
8082
8083 // If we are VDUPing a value that comes directly from a vector, that will
8084 // cause an unnecessary move to and from a GPR, where instead we could
8085 // just use VDUPLANE. We can only do this if the lane being extracted
8086 // is at a constant index, as the VDUP from lane instructions only have
8087 // constant-index forms.
8088 ConstantSDNode *constIndex;
8089 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8090 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8091 // We need to create a new undef vector to use for the VDUPLANE if the
8092 // size of the vector from which we get the value is different than the
8093 // size of the vector that we need to create. We will insert the element
8094 // such that the register coalescer will remove unnecessary copies.
8095 if (VT != Value->getOperand(0).getValueType()) {
8096 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8098 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8099 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8100 Value, DAG.getConstant(index, dl, MVT::i32)),
8101 DAG.getConstant(index, dl, MVT::i32));
8102 } else
8103 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8104 Value->getOperand(0), Value->getOperand(1));
8105 } else
8106 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8107
8108 if (!usesOnlyOneValue) {
8109 // The dominant value was splatted as 'N', but we now have to insert
8110 // all differing elements.
8111 for (unsigned I = 0; I < NumElts; ++I) {
8112 if (Op.getOperand(I) == Value)
8113 continue;
8115 Ops.push_back(N);
8116 Ops.push_back(Op.getOperand(I));
8117 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8118 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8119 }
8120 }
8121 return N;
8122 }
8126 assert(FVT == MVT::f32 || FVT == MVT::f16);
8127 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8128 for (unsigned i = 0; i < NumElts; ++i)
8129 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8130 Op.getOperand(i)));
8131 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8132 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8133 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8134 if (Val.getNode())
8135 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8136 }
8137 if (usesOnlyOneValue) {
8138 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8139 if (isConstant && Val.getNode())
8140 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8141 }
8142 }
8143
8144 // If all elements are constants and the case above didn't get hit, fall back
8145 // to the default expansion, which will generate a load from the constant
8146 // pool.
8147 if (isConstant)
8148 return SDValue();
8149
8150 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8151 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8152 // length <= 2.
8153 if (NumElts >= 4)
8154 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8155 return shuffle;
8156
8157 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8158 // VCVT's
8159 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8160 return VCVT;
8161 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8162 return VCVT;
8163
8164 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8165 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8166 // into two 64-bit vectors; we might discover a better way to lower it.
8167 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8168 EVT ExtVT = VT.getVectorElementType();
8169 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8170 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8171 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8172 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8173 SDValue Upper =
8174 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8175 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8176 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8177 if (Lower && Upper)
8178 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8179 }
8180
8181 // Vectors with 32- or 64-bit elements can be built by directly assigning
8182 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8183 // will be legalized.
8184 if (EltSize >= 32) {
8185 // Do the expansion with floating-point types, since that is what the VFP
8186 // registers are defined to use, and since i64 is not legal.
8187 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8188 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8190 for (unsigned i = 0; i < NumElts; ++i)
8191 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8192 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8193 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8194 }
8195
8196 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8197 // know the default expansion would otherwise fall back on something even
8198 // worse. For a vector with one or two non-undef values, that's
8199 // scalar_to_vector for the elements followed by a shuffle (provided the
8200 // shuffle is valid for the target) and materialization element by element
8201 // on the stack followed by a load for everything else.
8202 if (!isConstant && !usesOnlyOneValue) {
8203 SDValue Vec = DAG.getUNDEF(VT);
8204 for (unsigned i = 0 ; i < NumElts; ++i) {
8205 SDValue V = Op.getOperand(i);
8206 if (V.isUndef())
8207 continue;
8208 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8209 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8210 }
8211 return Vec;
8212 }
8213
8214 return SDValue();
8215}
8216
8217// Gather data to see if the operation can be modelled as a
8218// shuffle in combination with VEXTs.
8219SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8220 SelectionDAG &DAG) const {
8221 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8222 SDLoc dl(Op);
8223 EVT VT = Op.getValueType();
8224 unsigned NumElts = VT.getVectorNumElements();
8225
8226 struct ShuffleSourceInfo {
8227 SDValue Vec;
8228 unsigned MinElt = std::numeric_limits<unsigned>::max();
8229 unsigned MaxElt = 0;
8230
8231 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8232 // be compatible with the shuffle we intend to construct. As a result
8233 // ShuffleVec will be some sliding window into the original Vec.
8234 SDValue ShuffleVec;
8235
8236 // Code should guarantee that element i in Vec starts at element "WindowBase
8237 // + i * WindowScale in ShuffleVec".
8238 int WindowBase = 0;
8239 int WindowScale = 1;
8240
8241 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8242
8243 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8244 };
8245
8246 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8247 // node.
8249 for (unsigned i = 0; i < NumElts; ++i) {
8250 SDValue V = Op.getOperand(i);
8251 if (V.isUndef())
8252 continue;
8253 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8254 // A shuffle can only come from building a vector from various
8255 // elements of other vectors.
8256 return SDValue();
8257 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8258 // Furthermore, shuffles require a constant mask, whereas extractelts
8259 // accept variable indices.
8260 return SDValue();
8261 }
8262
8263 // Add this element source to the list if it's not already there.
8264 SDValue SourceVec = V.getOperand(0);
8265 auto Source = llvm::find(Sources, SourceVec);
8266 if (Source == Sources.end())
8267 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8268
8269 // Update the minimum and maximum lane number seen.
8270 unsigned EltNo = V.getConstantOperandVal(1);
8271 Source->MinElt = std::min(Source->MinElt, EltNo);
8272 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8273 }
8274
8275 // Currently only do something sane when at most two source vectors
8276 // are involved.
8277 if (Sources.size() > 2)
8278 return SDValue();
8279
8280 // Find out the smallest element size among result and two sources, and use
8281 // it as element size to build the shuffle_vector.
8282 EVT SmallestEltTy = VT.getVectorElementType();
8283 for (auto &Source : Sources) {
8284 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8285 if (SrcEltTy.bitsLT(SmallestEltTy))
8286 SmallestEltTy = SrcEltTy;
8287 }
8288 unsigned ResMultiplier =
8289 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8290 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8291 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8292
8293 // If the source vector is too wide or too narrow, we may nevertheless be able
8294 // to construct a compatible shuffle either by concatenating it with UNDEF or
8295 // extracting a suitable range of elements.
8296 for (auto &Src : Sources) {
8297 EVT SrcVT = Src.ShuffleVec.getValueType();
8298
8299 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8300 uint64_t VTSize = VT.getFixedSizeInBits();
8301 if (SrcVTSize == VTSize)
8302 continue;
8303
8304 // This stage of the search produces a source with the same element type as
8305 // the original, but with a total width matching the BUILD_VECTOR output.
8306 EVT EltVT = SrcVT.getVectorElementType();
8307 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8308 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8309
8310 if (SrcVTSize < VTSize) {
8311 if (2 * SrcVTSize != VTSize)
8312 return SDValue();
8313 // We can pad out the smaller vector for free, so if it's part of a
8314 // shuffle...
8315 Src.ShuffleVec =
8316 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8317 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8318 continue;
8319 }
8320
8321 if (SrcVTSize != 2 * VTSize)
8322 return SDValue();
8323
8324 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8325 // Span too large for a VEXT to cope
8326 return SDValue();
8327 }
8328
8329 if (Src.MinElt >= NumSrcElts) {
8330 // The extraction can just take the second half
8331 Src.ShuffleVec =
8332 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8333 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8334 Src.WindowBase = -NumSrcElts;
8335 } else if (Src.MaxElt < NumSrcElts) {
8336 // The extraction can just take the first half
8337 Src.ShuffleVec =
8338 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8339 DAG.getConstant(0, dl, MVT::i32));
8340 } else {
8341 // An actual VEXT is needed
8342 SDValue VEXTSrc1 =
8343 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8344 DAG.getConstant(0, dl, MVT::i32));
8345 SDValue VEXTSrc2 =
8346 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8347 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8348
8349 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8350 VEXTSrc2,
8351 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8352 Src.WindowBase = -Src.MinElt;
8353 }
8354 }
8355
8356 // Another possible incompatibility occurs from the vector element types. We
8357 // can fix this by bitcasting the source vectors to the same type we intend
8358 // for the shuffle.
8359 for (auto &Src : Sources) {
8360 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8361 if (SrcEltTy == SmallestEltTy)
8362 continue;
8363 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8364 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8365 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8366 Src.WindowBase *= Src.WindowScale;
8367 }
8368
8369 // Final check before we try to actually produce a shuffle.
8370 LLVM_DEBUG({
8371 for (auto Src : Sources)
8372 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8373 });
8374
8375 // The stars all align, our next step is to produce the mask for the shuffle.
8377 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8378 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8379 SDValue Entry = Op.getOperand(i);
8380 if (Entry.isUndef())
8381 continue;
8382
8383 auto Src = llvm::find(Sources, Entry.getOperand(0));
8384 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8385
8386 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8387 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8388 // segment.
8389 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8390 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8391 VT.getScalarSizeInBits());
8392 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8393
8394 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8395 // starting at the appropriate offset.
8396 int *LaneMask = &Mask[i * ResMultiplier];
8397
8398 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8399 ExtractBase += NumElts * (Src - Sources.begin());
8400 for (int j = 0; j < LanesDefined; ++j)
8401 LaneMask[j] = ExtractBase + j;
8402 }
8403
8404
8405 // We can't handle more than two sources. This should have already
8406 // been checked before this point.
8407 assert(Sources.size() <= 2 && "Too many sources!");
8408
8409 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8410 for (unsigned i = 0; i < Sources.size(); ++i)
8411 ShuffleOps[i] = Sources[i].ShuffleVec;
8412
8413 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8414 ShuffleOps[1], Mask, DAG);
8415 if (!Shuffle)
8416 return SDValue();
8417 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8418}
8419
8421 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8430 OP_VUZPL, // VUZP, left result
8431 OP_VUZPR, // VUZP, right result
8432 OP_VZIPL, // VZIP, left result
8433 OP_VZIPR, // VZIP, right result
8434 OP_VTRNL, // VTRN, left result
8435 OP_VTRNR // VTRN, right result
8437
8438static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8439 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8440 switch (OpNum) {
8441 case OP_COPY:
8442 case OP_VREV:
8443 case OP_VDUP0:
8444 case OP_VDUP1:
8445 case OP_VDUP2:
8446 case OP_VDUP3:
8447 return true;
8448 }
8449 return false;
8450}
8451
8452/// isShuffleMaskLegal - Targets can use this to indicate that they only
8453/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8454/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8455/// are assumed to be legal.
8457 if (VT.getVectorNumElements() == 4 &&
8458 (VT.is128BitVector() || VT.is64BitVector())) {
8459 unsigned PFIndexes[4];
8460 for (unsigned i = 0; i != 4; ++i) {
8461 if (M[i] < 0)
8462 PFIndexes[i] = 8;
8463 else
8464 PFIndexes[i] = M[i];
8465 }
8466
8467 // Compute the index in the perfect shuffle table.
8468 unsigned PFTableIndex =
8469 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8470 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8471 unsigned Cost = (PFEntry >> 30);
8472
8473 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8474 return true;
8475 }
8476
8477 bool ReverseVEXT, isV_UNDEF;
8478 unsigned Imm, WhichResult;
8479
8480 unsigned EltSize = VT.getScalarSizeInBits();
8481 if (EltSize >= 32 ||
8483 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8484 isVREVMask(M, VT, 64) ||
8485 isVREVMask(M, VT, 32) ||
8486 isVREVMask(M, VT, 16))
8487 return true;
8488 else if (Subtarget->hasNEON() &&
8489 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8490 isVTBLMask(M, VT) ||
8491 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8492 return true;
8493 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8494 isReverseMask(M, VT))
8495 return true;
8496 else if (Subtarget->hasMVEIntegerOps() &&
8497 (isVMOVNMask(M, VT, true, false) ||
8498 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8499 return true;
8500 else if (Subtarget->hasMVEIntegerOps() &&
8501 (isTruncMask(M, VT, false, false) ||
8502 isTruncMask(M, VT, false, true) ||
8503 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8504 return true;
8505 else
8506 return false;
8507}
8508
8509/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8510/// the specified operations to build the shuffle.
8511static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8512 SDValue RHS, SelectionDAG &DAG,
8513 const SDLoc &dl) {
8514 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8515 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8516 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8517
8518 if (OpNum == OP_COPY) {
8519 if (LHSID == (1*9+2)*9+3) return LHS;
8520 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8521 return RHS;
8522 }
8523
8524 SDValue OpLHS, OpRHS;
8525 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8526 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8527 EVT VT = OpLHS.getValueType();
8528
8529 switch (OpNum) {
8530 default: llvm_unreachable("Unknown shuffle opcode!");
8531 case OP_VREV:
8532 // VREV divides the vector in half and swaps within the half.
8533 if (VT.getScalarSizeInBits() == 32)
8534 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8535 // vrev <4 x i16> -> VREV32
8536 if (VT.getScalarSizeInBits() == 16)
8537 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8538 // vrev <4 x i8> -> VREV16
8539 assert(VT.getScalarSizeInBits() == 8);
8540 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8541 case OP_VDUP0:
8542 case OP_VDUP1:
8543 case OP_VDUP2:
8544 case OP_VDUP3:
8545 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8546 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8547 case OP_VEXT1:
8548 case OP_VEXT2:
8549 case OP_VEXT3:
8550 return DAG.getNode(ARMISD::VEXT, dl, VT,
8551 OpLHS, OpRHS,
8552 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8553 case OP_VUZPL:
8554 case OP_VUZPR:
8555 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8556 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8557 case OP_VZIPL:
8558 case OP_VZIPR:
8559 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8560 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8561 case OP_VTRNL:
8562 case OP_VTRNR:
8563 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8564 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8565 }
8566}
8567
8569 ArrayRef<int> ShuffleMask,
8570 SelectionDAG &DAG) {
8571 // Check to see if we can use the VTBL instruction.
8572 SDValue V1 = Op.getOperand(0);
8573 SDValue V2 = Op.getOperand(1);
8574 SDLoc DL(Op);
8575
8576 SmallVector<SDValue, 8> VTBLMask;
8577 for (int I : ShuffleMask)
8578 VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
8579
8580 if (V2.getNode()->isUndef())
8581 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8582 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8583
8584 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8585 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8586}
8587
8589 SDLoc DL(Op);
8590 EVT VT = Op.getValueType();
8591
8592 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8593 "Expect an v8i16/v16i8 type");
8594 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8595 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8596 // extract the first 8 bytes into the top double word and the last 8 bytes
8597 // into the bottom double word, through a new vector shuffle that will be
8598 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8599 std::vector<int> NewMask;
8600 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8601 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8602 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8603 NewMask.push_back(i);
8604 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8605}
8606
8608 switch (VT.getSimpleVT().SimpleTy) {
8609 case MVT::v2i1:
8610 return MVT::v2f64;
8611 case MVT::v4i1:
8612 return MVT::v4i32;
8613 case MVT::v8i1:
8614 return MVT::v8i16;
8615 case MVT::v16i1:
8616 return MVT::v16i8;
8617 default:
8618 llvm_unreachable("Unexpected vector predicate type");
8619 }
8620}
8621
8623 SelectionDAG &DAG) {
8624 // Converting from boolean predicates to integers involves creating a vector
8625 // of all ones or all zeroes and selecting the lanes based upon the real
8626 // predicate.
8628 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8629 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8630
8631 SDValue AllZeroes =
8632 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8633 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8634
8635 // Get full vector type from predicate type
8637
8638 SDValue RecastV1;
8639 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8640 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8641 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8642 // since we know in hardware the sizes are really the same.
8643 if (VT != MVT::v16i1)
8644 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8645 else
8646 RecastV1 = Pred;
8647
8648 // Select either all ones or zeroes depending upon the real predicate bits.
8649 SDValue PredAsVector =
8650 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8651
8652 // Recast our new predicate-as-integer v16i8 vector into something
8653 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8654 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8655}
8656
8658 const ARMSubtarget *ST) {
8659 EVT VT = Op.getValueType();
8660 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8661 ArrayRef<int> ShuffleMask = SVN->getMask();
8662
8663 assert(ST->hasMVEIntegerOps() &&
8664 "No support for vector shuffle of boolean predicates");
8665
8666 SDValue V1 = Op.getOperand(0);
8667 SDValue V2 = Op.getOperand(1);
8668 SDLoc dl(Op);
8669 if (isReverseMask(ShuffleMask, VT)) {
8670 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8671 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8672 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8673 DAG.getConstant(16, dl, MVT::i32));
8674 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8675 }
8676
8677 // Until we can come up with optimised cases for every single vector
8678 // shuffle in existence we have chosen the least painful strategy. This is
8679 // to essentially promote the boolean predicate to a 8-bit integer, where
8680 // each predicate represents a byte. Then we fall back on a normal integer
8681 // vector shuffle and convert the result back into a predicate vector. In
8682 // many cases the generated code might be even better than scalar code
8683 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8684 // fields in a register into 8 other arbitrary 2-bit fields!
8685 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8686 EVT NewVT = PredAsVector1.getValueType();
8687 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8688 : PromoteMVEPredVector(dl, V2, VT, DAG);
8689 assert(PredAsVector2.getValueType() == NewVT &&
8690 "Expected identical vector type in expanded i1 shuffle!");
8691
8692 // Do the shuffle!
8693 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8694 PredAsVector2, ShuffleMask);
8695
8696 // Now return the result of comparing the shuffled vector with zero,
8697 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8698 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8699 if (VT == MVT::v2i1) {
8700 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8701 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8702 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8703 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8704 }
8705 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8706 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8707}
8708
8710 ArrayRef<int> ShuffleMask,
8711 SelectionDAG &DAG) {
8712 // Attempt to lower the vector shuffle using as many whole register movs as
8713 // possible. This is useful for types smaller than 32bits, which would
8714 // often otherwise become a series for grp movs.
8715 SDLoc dl(Op);
8716 EVT VT = Op.getValueType();
8717 if (VT.getScalarSizeInBits() >= 32)
8718 return SDValue();
8719
8720 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8721 "Unexpected vector type");
8722 int NumElts = VT.getVectorNumElements();
8723 int QuarterSize = NumElts / 4;
8724 // The four final parts of the vector, as i32's
8725 SDValue Parts[4];
8726
8727 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8728 // <u,u,u,u>), returning the vmov lane index
8729 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8730 // Detect which mov lane this would be from the first non-undef element.
8731 int MovIdx = -1;
8732 for (int i = 0; i < Length; i++) {
8733 if (ShuffleMask[Start + i] >= 0) {
8734 if (ShuffleMask[Start + i] % Length != i)
8735 return -1;
8736 MovIdx = ShuffleMask[Start + i] / Length;
8737 break;
8738 }
8739 }
8740 // If all items are undef, leave this for other combines
8741 if (MovIdx == -1)
8742 return -1;
8743 // Check the remaining values are the correct part of the same mov
8744 for (int i = 1; i < Length; i++) {
8745 if (ShuffleMask[Start + i] >= 0 &&
8746 (ShuffleMask[Start + i] / Length != MovIdx ||
8747 ShuffleMask[Start + i] % Length != i))
8748 return -1;
8749 }
8750 return MovIdx;
8751 };
8752
8753 for (int Part = 0; Part < 4; ++Part) {
8754 // Does this part look like a mov
8755 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8756 if (Elt != -1) {
8757 SDValue Input = Op->getOperand(0);
8758 if (Elt >= 4) {
8759 Input = Op->getOperand(1);
8760 Elt -= 4;
8761 }
8762 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8763 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8764 DAG.getConstant(Elt, dl, MVT::i32));
8765 }
8766 }
8767
8768 // Nothing interesting found, just return
8769 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8770 return SDValue();
8771
8772 // The other parts need to be built with the old shuffle vector, cast to a
8773 // v4i32 and extract_vector_elts
8774 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8775 SmallVector<int, 16> NewShuffleMask;
8776 for (int Part = 0; Part < 4; ++Part)
8777 for (int i = 0; i < QuarterSize; i++)
8778 NewShuffleMask.push_back(
8779 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8780 SDValue NewShuffle = DAG.getVectorShuffle(
8781 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8782 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8783
8784 for (int Part = 0; Part < 4; ++Part)
8785 if (!Parts[Part])
8786 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8787 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8788 }
8789 // Build a vector out of the various parts and bitcast it back to the original
8790 // type.
8791 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8792 return DAG.getBitcast(VT, NewVec);
8793}
8794
8796 ArrayRef<int> ShuffleMask,
8797 SelectionDAG &DAG) {
8798 SDValue V1 = Op.getOperand(0);
8799 SDValue V2 = Op.getOperand(1);
8800 EVT VT = Op.getValueType();
8801 unsigned NumElts = VT.getVectorNumElements();
8802
8803 // An One-Off Identity mask is one that is mostly an identity mask from as
8804 // single source but contains a single element out-of-place, either from a
8805 // different vector or from another position in the same vector. As opposed to
8806 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8807 // pair directly.
8808 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8809 int &OffElement) {
8810 OffElement = -1;
8811 int NonUndef = 0;
8812 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8813 if (Mask[i] == -1)
8814 continue;
8815 NonUndef++;
8816 if (Mask[i] != i + BaseOffset) {
8817 if (OffElement == -1)
8818 OffElement = i;
8819 else
8820 return false;
8821 }
8822 }
8823 return NonUndef > 2 && OffElement != -1;
8824 };
8825 int OffElement;
8826 SDValue VInput;
8827 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8828 VInput = V1;
8829 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8830 VInput = V2;
8831 else
8832 return SDValue();
8833
8834 SDLoc dl(Op);
8835 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8836 ? MVT::i32
8837 : VT.getScalarType();
8838 SDValue Elt = DAG.getNode(
8839 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8840 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8841 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8842 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8843 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8844}
8845
8847 const ARMSubtarget *ST) {
8848 SDValue V1 = Op.getOperand(0);
8849 SDValue V2 = Op.getOperand(1);
8850 SDLoc dl(Op);
8851 EVT VT = Op.getValueType();
8852 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8853 unsigned EltSize = VT.getScalarSizeInBits();
8854
8855 if (ST->hasMVEIntegerOps() && EltSize == 1)
8856 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8857
8858 // Convert shuffles that are directly supported on NEON to target-specific
8859 // DAG nodes, instead of keeping them as shuffles and matching them again
8860 // during code selection. This is more efficient and avoids the possibility
8861 // of inconsistencies between legalization and selection.
8862 // FIXME: floating-point vectors should be canonicalized to integer vectors
8863 // of the same time so that they get CSEd properly.
8864 ArrayRef<int> ShuffleMask = SVN->getMask();
8865
8866 if (EltSize <= 32) {
8867 if (SVN->isSplat()) {
8868 int Lane = SVN->getSplatIndex();
8869 // If this is undef splat, generate it via "just" vdup, if possible.
8870 if (Lane == -1) Lane = 0;
8871
8872 // Test if V1 is a SCALAR_TO_VECTOR.
8873 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8874 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8875 }
8876 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8877 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8878 // reaches it).
8879 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8880 !isa<ConstantSDNode>(V1.getOperand(0))) {
8881 bool IsScalarToVector = true;
8882 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8883 if (!V1.getOperand(i).isUndef()) {
8884 IsScalarToVector = false;
8885 break;
8886 }
8887 if (IsScalarToVector)
8888 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8889 }
8890 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8891 DAG.getConstant(Lane, dl, MVT::i32));
8892 }
8893
8894 bool ReverseVEXT = false;
8895 unsigned Imm = 0;
8896 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8897 if (ReverseVEXT)
8898 std::swap(V1, V2);
8899 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8900 DAG.getConstant(Imm, dl, MVT::i32));
8901 }
8902
8903 if (isVREVMask(ShuffleMask, VT, 64))
8904 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8905 if (isVREVMask(ShuffleMask, VT, 32))
8906 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8907 if (isVREVMask(ShuffleMask, VT, 16))
8908 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8909
8910 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8911 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8912 DAG.getConstant(Imm, dl, MVT::i32));
8913 }
8914
8915 // Check for Neon shuffles that modify both input vectors in place.
8916 // If both results are used, i.e., if there are two shuffles with the same
8917 // source operands and with masks corresponding to both results of one of
8918 // these operations, DAG memoization will ensure that a single node is
8919 // used for both shuffles.
8920 unsigned WhichResult = 0;
8921 bool isV_UNDEF = false;
8922 if (ST->hasNEON()) {
8923 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8924 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8925 if (isV_UNDEF)
8926 V2 = V1;
8927 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8928 .getValue(WhichResult);
8929 }
8930 }
8931 if (ST->hasMVEIntegerOps()) {
8932 if (isVMOVNMask(ShuffleMask, VT, false, false))
8933 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8934 DAG.getConstant(0, dl, MVT::i32));
8935 if (isVMOVNMask(ShuffleMask, VT, true, false))
8936 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8937 DAG.getConstant(1, dl, MVT::i32));
8938 if (isVMOVNMask(ShuffleMask, VT, true, true))
8939 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8940 DAG.getConstant(1, dl, MVT::i32));
8941 }
8942
8943 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8944 // shuffles that produce a result larger than their operands with:
8945 // shuffle(concat(v1, undef), concat(v2, undef))
8946 // ->
8947 // shuffle(concat(v1, v2), undef)
8948 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8949 //
8950 // This is useful in the general case, but there are special cases where
8951 // native shuffles produce larger results: the two-result ops.
8952 //
8953 // Look through the concat when lowering them:
8954 // shuffle(concat(v1, v2), undef)
8955 // ->
8956 // concat(VZIP(v1, v2):0, :1)
8957 //
8958 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8959 SDValue SubV1 = V1->getOperand(0);
8960 SDValue SubV2 = V1->getOperand(1);
8961 EVT SubVT = SubV1.getValueType();
8962
8963 // We expect these to have been canonicalized to -1.
8964 assert(llvm::all_of(ShuffleMask, [&](int i) {
8965 return i < (int)VT.getVectorNumElements();
8966 }) && "Unexpected shuffle index into UNDEF operand!");
8967
8968 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8969 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8970 if (isV_UNDEF)
8971 SubV2 = SubV1;
8972 assert((WhichResult == 0) &&
8973 "In-place shuffle of concat can only have one result!");
8974 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8975 SubV1, SubV2);
8976 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8977 Res.getValue(1));
8978 }
8979 }
8980 }
8981
8982 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8983 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8984 return V;
8985
8986 for (bool Top : {false, true}) {
8987 for (bool SingleSource : {false, true}) {
8988 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8989 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8990 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8991 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8992 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8993 SingleSource ? V1 : V2);
8994 if (Top) {
8995 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8996 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8997 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8998 }
8999 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
9000 }
9001 }
9002 }
9003 }
9004
9005 // If the shuffle is not directly supported and it has 4 elements, use
9006 // the PerfectShuffle-generated table to synthesize it from other shuffles.
9007 unsigned NumElts = VT.getVectorNumElements();
9008 if (NumElts == 4) {
9009 unsigned PFIndexes[4];
9010 for (unsigned i = 0; i != 4; ++i) {
9011 if (ShuffleMask[i] < 0)
9012 PFIndexes[i] = 8;
9013 else
9014 PFIndexes[i] = ShuffleMask[i];
9015 }
9016
9017 // Compute the index in the perfect shuffle table.
9018 unsigned PFTableIndex =
9019 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9020 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9021 unsigned Cost = (PFEntry >> 30);
9022
9023 if (Cost <= 4) {
9024 if (ST->hasNEON())
9025 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9026 else if (isLegalMVEShuffleOp(PFEntry)) {
9027 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9028 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9029 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
9030 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
9031 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
9032 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9033 }
9034 }
9035 }
9036
9037 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
9038 if (EltSize >= 32) {
9039 // Do the expansion with floating-point types, since that is what the VFP
9040 // registers are defined to use, and since i64 is not legal.
9041 EVT EltVT = EVT::getFloatingPointVT(EltSize);
9042 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
9043 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
9044 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
9046 for (unsigned i = 0; i < NumElts; ++i) {
9047 if (ShuffleMask[i] < 0)
9048 Ops.push_back(DAG.getUNDEF(EltVT));
9049 else
9050 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
9051 ShuffleMask[i] < (int)NumElts ? V1 : V2,
9052 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
9053 dl, MVT::i32)));
9054 }
9055 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9056 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9057 }
9058
9059 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9060 isReverseMask(ShuffleMask, VT))
9061 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9062
9063 if (ST->hasNEON() && VT == MVT::v8i8)
9064 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9065 return NewOp;
9066
9067 if (ST->hasMVEIntegerOps())
9068 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9069 return NewOp;
9070
9071 return SDValue();
9072}
9073
9075 const ARMSubtarget *ST) {
9076 EVT VecVT = Op.getOperand(0).getValueType();
9077 SDLoc dl(Op);
9078
9079 assert(ST->hasMVEIntegerOps() &&
9080 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9081
9082 SDValue Conv =
9083 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9084 unsigned Lane = Op.getConstantOperandVal(2);
9085 unsigned LaneWidth =
9087 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9088 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9089 Op.getOperand(1), DAG.getValueType(MVT::i1));
9090 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9091 DAG.getConstant(~Mask, dl, MVT::i32));
9092 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9093}
9094
9095SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9096 SelectionDAG &DAG) const {
9097 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9098 SDValue Lane = Op.getOperand(2);
9099 if (!isa<ConstantSDNode>(Lane))
9100 return SDValue();
9101
9102 SDValue Elt = Op.getOperand(1);
9103 EVT EltVT = Elt.getValueType();
9104
9105 if (Subtarget->hasMVEIntegerOps() &&
9106 Op.getValueType().getScalarSizeInBits() == 1)
9107 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9108
9109 if (getTypeAction(*DAG.getContext(), EltVT) ==
9111 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9112 // but the type system will try to do that if we don't intervene.
9113 // Reinterpret any such vector-element insertion as one with the
9114 // corresponding integer types.
9115
9116 SDLoc dl(Op);
9117
9118 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9119 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9121
9122 SDValue VecIn = Op.getOperand(0);
9123 EVT VecVT = VecIn.getValueType();
9124 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9125 VecVT.getVectorNumElements());
9126
9127 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9128 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9129 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9130 IVecIn, IElt, Lane);
9131 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9132 }
9133
9134 return Op;
9135}
9136
9138 const ARMSubtarget *ST) {
9139 EVT VecVT = Op.getOperand(0).getValueType();
9140 SDLoc dl(Op);
9141
9142 assert(ST->hasMVEIntegerOps() &&
9143 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9144
9145 SDValue Conv =
9146 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9147 unsigned Lane = Op.getConstantOperandVal(1);
9148 unsigned LaneWidth =
9150 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9151 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9152 return Shift;
9153}
9154
9156 const ARMSubtarget *ST) {
9157 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9158 SDValue Lane = Op.getOperand(1);
9159 if (!isa<ConstantSDNode>(Lane))
9160 return SDValue();
9161
9162 SDValue Vec = Op.getOperand(0);
9163 EVT VT = Vec.getValueType();
9164
9165 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9166 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9167
9168 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9169 SDLoc dl(Op);
9170 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9171 }
9172
9173 return Op;
9174}
9175
9177 const ARMSubtarget *ST) {
9178 SDLoc dl(Op);
9179 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9180 "Unexpected custom CONCAT_VECTORS lowering");
9182 "Unexpected custom CONCAT_VECTORS lowering");
9183 assert(ST->hasMVEIntegerOps() &&
9184 "CONCAT_VECTORS lowering only supported for MVE");
9185
9186 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9187 EVT Op1VT = V1.getValueType();
9188 EVT Op2VT = V2.getValueType();
9189 assert(Op1VT == Op2VT && "Operand types don't match!");
9190 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9191 "Unexpected i1 concat operations!");
9192 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9193
9194 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9195 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9196
9197 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9198 // promoted to v8i16, etc.
9199 MVT ElType =
9201 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9202
9203 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9204 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9205 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9206 // ConcatVT.
9207 SDValue ConVec =
9208 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9209 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9210 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9211 }
9212
9213 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9214 // to be the right size for the destination. For example, if Op1 is v4i1
9215 // then the promoted vector is v4i32. The result of concatenation gives a
9216 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9217 // needs truncating to i16 and inserting in the result.
9218 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9219 EVT NewVT = NewV.getValueType();
9220 EVT ConcatVT = ConVec.getValueType();
9221 unsigned ExtScale = 1;
9222 if (NewVT == MVT::v2f64) {
9223 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9224 ExtScale = 2;
9225 }
9226 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9227 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9228 DAG.getIntPtrConstant(i * ExtScale, dl));
9229 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9230 DAG.getConstant(j, dl, MVT::i32));
9231 }
9232 return ConVec;
9233 };
9234 unsigned j = 0;
9235 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9236 ConVec = ExtractInto(NewV1, ConVec, j);
9237 ConVec = ExtractInto(NewV2, ConVec, j);
9238
9239 // Now return the result of comparing the subvector with zero, which will
9240 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9241 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9242 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9243 };
9244
9245 // Concat each pair of subvectors and pack into the lower half of the array.
9246 SmallVector<SDValue> ConcatOps(Op->ops());
9247 while (ConcatOps.size() > 1) {
9248 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9249 SDValue V1 = ConcatOps[I];
9250 SDValue V2 = ConcatOps[I + 1];
9251 ConcatOps[I / 2] = ConcatPair(V1, V2);
9252 }
9253 ConcatOps.resize(ConcatOps.size() / 2);
9254 }
9255 return ConcatOps[0];
9256}
9257
9259 const ARMSubtarget *ST) {
9260 EVT VT = Op->getValueType(0);
9261 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9262 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9263
9264 // The only time a CONCAT_VECTORS operation can have legal types is when
9265 // two 64-bit vectors are concatenated to a 128-bit vector.
9266 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9267 "unexpected CONCAT_VECTORS");
9268 SDLoc dl(Op);
9269 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9270 SDValue Op0 = Op.getOperand(0);
9271 SDValue Op1 = Op.getOperand(1);
9272 if (!Op0.isUndef())
9273 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9274 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9275 DAG.getIntPtrConstant(0, dl));
9276 if (!Op1.isUndef())
9277 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9278 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9279 DAG.getIntPtrConstant(1, dl));
9280 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9281}
9282
9284 const ARMSubtarget *ST) {
9285 SDValue V1 = Op.getOperand(0);
9286 SDValue V2 = Op.getOperand(1);
9287 SDLoc dl(Op);
9288 EVT VT = Op.getValueType();
9289 EVT Op1VT = V1.getValueType();
9290 unsigned NumElts = VT.getVectorNumElements();
9291 unsigned Index = V2->getAsZExtVal();
9292
9293 assert(VT.getScalarSizeInBits() == 1 &&
9294 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9295 assert(ST->hasMVEIntegerOps() &&
9296 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9297
9298 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9299
9300 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9301 // promoted to v8i16, etc.
9302
9304
9305 if (NumElts == 2) {
9306 EVT SubVT = MVT::v4i32;
9307 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9308 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9309 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9310 DAG.getIntPtrConstant(i, dl));
9311 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9312 DAG.getConstant(j, dl, MVT::i32));
9313 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9314 DAG.getConstant(j + 1, dl, MVT::i32));
9315 }
9316 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9317 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9318 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9319 }
9320
9321 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9322 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9323 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9324 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9325 DAG.getIntPtrConstant(i, dl));
9326 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9327 DAG.getConstant(j, dl, MVT::i32));
9328 }
9329
9330 // Now return the result of comparing the subvector with zero,
9331 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9332 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9333 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9334}
9335
9336// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9338 const ARMSubtarget *ST) {
9339 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9340 EVT VT = N->getValueType(0);
9341 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9342 "Expected a vector i1 type!");
9343 SDValue Op = N->getOperand(0);
9344 EVT FromVT = Op.getValueType();
9345 SDLoc DL(N);
9346
9347 SDValue And =
9348 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9349 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9350 DAG.getCondCode(ISD::SETNE));
9351}
9352
9354 const ARMSubtarget *Subtarget) {
9355 if (!Subtarget->hasMVEIntegerOps())
9356 return SDValue();
9357
9358 EVT ToVT = N->getValueType(0);
9359 if (ToVT.getScalarType() == MVT::i1)
9360 return LowerTruncatei1(N, DAG, Subtarget);
9361
9362 // MVE does not have a single instruction to perform the truncation of a v4i32
9363 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9364 // Most of the instructions in MVE follow the 'Beats' system, where moving
9365 // values from different lanes is usually something that the instructions
9366 // avoid.
9367 //
9368 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9369 // which take a the top/bottom half of a larger lane and extend it (or do the
9370 // opposite, truncating into the top/bottom lane from a larger lane). Note
9371 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9372 // bottom 16bits from each vector lane. This works really well with T/B
9373 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9374 // to move order.
9375 //
9376 // But truncates and sext/zext are always going to be fairly common from llvm.
9377 // We have several options for how to deal with them:
9378 // - Wherever possible combine them into an instruction that makes them
9379 // "free". This includes loads/stores, which can perform the trunc as part
9380 // of the memory operation. Or certain shuffles that can be turned into
9381 // VMOVN/VMOVL.
9382 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9383 // trunc(mul(sext(a), sext(b))) may become
9384 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9385 // this case can use VMULL). This is performed in the
9386 // MVELaneInterleavingPass.
9387 // - Otherwise we have an option. By default we would expand the
9388 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9389 // registers. One for each vector lane in the vector. This can obviously be
9390 // very expensive.
9391 // - The other option is to use the fact that loads/store can extend/truncate
9392 // to turn a trunc into two truncating stack stores and a stack reload. This
9393 // becomes 3 back-to-back memory operations, but at least that is less than
9394 // all the insert/extracts.
9395 //
9396 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9397 // are either optimized where they can be, or eventually lowered into stack
9398 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9399 // two early, where other instructions would be better, and stops us from
9400 // having to reconstruct multiple buildvector shuffles into loads/stores.
9401 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9402 return SDValue();
9403 EVT FromVT = N->getOperand(0).getValueType();
9404 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9405 return SDValue();
9406
9407 SDValue Lo, Hi;
9408 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9409 SDLoc DL(N);
9410 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9411}
9412
9414 const ARMSubtarget *Subtarget) {
9415 if (!Subtarget->hasMVEIntegerOps())
9416 return SDValue();
9417
9418 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9419
9420 EVT ToVT = N->getValueType(0);
9421 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9422 return SDValue();
9423 SDValue Op = N->getOperand(0);
9424 EVT FromVT = Op.getValueType();
9425 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9426 return SDValue();
9427
9428 SDLoc DL(N);
9429 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9430 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9431 ExtVT = MVT::v8i16;
9432
9433 unsigned Opcode =
9435 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9436 SDValue Ext1 = Ext.getValue(1);
9437
9438 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9439 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9440 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9441 }
9442
9443 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9444}
9445
9446/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9447/// element has been zero/sign-extended, depending on the isSigned parameter,
9448/// from an integer type half its size.
9450 bool isSigned) {
9451 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9452 EVT VT = N->getValueType(0);
9453 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9454 SDNode *BVN = N->getOperand(0).getNode();
9455 if (BVN->getValueType(0) != MVT::v4i32 ||
9456 BVN->getOpcode() != ISD::BUILD_VECTOR)
9457 return false;
9458 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9459 unsigned HiElt = 1 - LoElt;
9460 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9461 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9462 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9463 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9464 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9465 return false;
9466 if (isSigned) {
9467 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9468 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9469 return true;
9470 } else {
9471 if (Hi0->isZero() && Hi1->isZero())
9472 return true;
9473 }
9474 return false;
9475 }
9476
9477 if (N->getOpcode() != ISD::BUILD_VECTOR)
9478 return false;
9479
9480 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9481 SDNode *Elt = N->getOperand(i).getNode();
9482 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9483 unsigned EltSize = VT.getScalarSizeInBits();
9484 unsigned HalfSize = EltSize / 2;
9485 if (isSigned) {
9486 if (!isIntN(HalfSize, C->getSExtValue()))
9487 return false;
9488 } else {
9489 if (!isUIntN(HalfSize, C->getZExtValue()))
9490 return false;
9491 }
9492 continue;
9493 }
9494 return false;
9495 }
9496
9497 return true;
9498}
9499
9500/// isSignExtended - Check if a node is a vector value that is sign-extended
9501/// or a constant BUILD_VECTOR with sign-extended elements.
9503 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9504 return true;
9505 if (isExtendedBUILD_VECTOR(N, DAG, true))
9506 return true;
9507 return false;
9508}
9509
9510/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9511/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9513 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9515 return true;
9516 if (isExtendedBUILD_VECTOR(N, DAG, false))
9517 return true;
9518 return false;
9519}
9520
9521static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9522 if (OrigVT.getSizeInBits() >= 64)
9523 return OrigVT;
9524
9525 assert(OrigVT.isSimple() && "Expecting a simple value type");
9526
9527 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9528 switch (OrigSimpleTy) {
9529 default: llvm_unreachable("Unexpected Vector Type");
9530 case MVT::v2i8:
9531 case MVT::v2i16:
9532 return MVT::v2i32;
9533 case MVT::v4i8:
9534 return MVT::v4i16;
9535 }
9536}
9537
9538/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9539/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9540/// We insert the required extension here to get the vector to fill a D register.
9542 const EVT &OrigTy,
9543 const EVT &ExtTy,
9544 unsigned ExtOpcode) {
9545 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9546 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9547 // 64-bits we need to insert a new extension so that it will be 64-bits.
9548 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9549 if (OrigTy.getSizeInBits() >= 64)
9550 return N;
9551
9552 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9553 EVT NewVT = getExtensionTo64Bits(OrigTy);
9554
9555 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9556}
9557
9558/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9559/// does not do any sign/zero extension. If the original vector is less
9560/// than 64 bits, an appropriate extension will be added after the load to
9561/// reach a total size of 64 bits. We have to add the extension separately
9562/// because ARM does not have a sign/zero extending load for vectors.
9564 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9565
9566 // The load already has the right type.
9567 if (ExtendedTy == LD->getMemoryVT())
9568 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9569 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9570 LD->getMemOperand()->getFlags());
9571
9572 // We need to create a zextload/sextload. We cannot just create a load
9573 // followed by a zext/zext node because LowerMUL is also run during normal
9574 // operation legalization where we can't create illegal types.
9575 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9576 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9577 LD->getMemoryVT(), LD->getAlign(),
9578 LD->getMemOperand()->getFlags());
9579}
9580
9581/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9582/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9583/// the unextended value. The unextended vector should be 64 bits so that it can
9584/// be used as an operand to a VMULL instruction. If the original vector size
9585/// before extension is less than 64 bits we add a an extension to resize
9586/// the vector to 64 bits.
9588 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9589 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9590 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9591 N->getOperand(0)->getValueType(0),
9592 N->getValueType(0),
9593 N->getOpcode());
9594
9595 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9596 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9597 "Expected extending load");
9598
9599 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9600 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9601 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9602 SDValue extLoad =
9603 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9604 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9605
9606 return newLoad;
9607 }
9608
9609 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9610 // have been legalized as a BITCAST from v4i32.
9611 if (N->getOpcode() == ISD::BITCAST) {
9612 SDNode *BVN = N->getOperand(0).getNode();
9614 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9615 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9616 return DAG.getBuildVector(
9617 MVT::v2i32, SDLoc(N),
9618 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9619 }
9620 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9621 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9622 EVT VT = N->getValueType(0);
9623 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9624 unsigned NumElts = VT.getVectorNumElements();
9625 MVT TruncVT = MVT::getIntegerVT(EltSize);
9627 SDLoc dl(N);
9628 for (unsigned i = 0; i != NumElts; ++i) {
9629 const APInt &CInt = N->getConstantOperandAPInt(i);
9630 // Element types smaller than 32 bits are not legal, so use i32 elements.
9631 // The values are implicitly truncated so sext vs. zext doesn't matter.
9632 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9633 }
9634 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9635}
9636
9637static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9638 unsigned Opcode = N->getOpcode();
9639 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9640 SDNode *N0 = N->getOperand(0).getNode();
9641 SDNode *N1 = N->getOperand(1).getNode();
9642 return N0->hasOneUse() && N1->hasOneUse() &&
9643 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9644 }
9645 return false;
9646}
9647
9648static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9649 unsigned Opcode = N->getOpcode();
9650 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9651 SDNode *N0 = N->getOperand(0).getNode();
9652 SDNode *N1 = N->getOperand(1).getNode();
9653 return N0->hasOneUse() && N1->hasOneUse() &&
9654 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9655 }
9656 return false;
9657}
9658
9660 // Multiplications are only custom-lowered for 128-bit vectors so that
9661 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9662 EVT VT = Op.getValueType();
9663 assert(VT.is128BitVector() && VT.isInteger() &&
9664 "unexpected type for custom-lowering ISD::MUL");
9665 SDNode *N0 = Op.getOperand(0).getNode();
9666 SDNode *N1 = Op.getOperand(1).getNode();
9667 unsigned NewOpc = 0;
9668 bool isMLA = false;
9669 bool isN0SExt = isSignExtended(N0, DAG);
9670 bool isN1SExt = isSignExtended(N1, DAG);
9671 if (isN0SExt && isN1SExt)
9672 NewOpc = ARMISD::VMULLs;
9673 else {
9674 bool isN0ZExt = isZeroExtended(N0, DAG);
9675 bool isN1ZExt = isZeroExtended(N1, DAG);
9676 if (isN0ZExt && isN1ZExt)
9677 NewOpc = ARMISD::VMULLu;
9678 else if (isN1SExt || isN1ZExt) {
9679 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9680 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9681 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9682 NewOpc = ARMISD::VMULLs;
9683 isMLA = true;
9684 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9685 NewOpc = ARMISD::VMULLu;
9686 isMLA = true;
9687 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9688 std::swap(N0, N1);
9689 NewOpc = ARMISD::VMULLu;
9690 isMLA = true;
9691 }
9692 }
9693
9694 if (!NewOpc) {
9695 if (VT == MVT::v2i64)
9696 // Fall through to expand this. It is not legal.
9697 return SDValue();
9698 else
9699 // Other vector multiplications are legal.
9700 return Op;
9701 }
9702 }
9703
9704 // Legalize to a VMULL instruction.
9705 SDLoc DL(Op);
9706 SDValue Op0;
9707 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9708 if (!isMLA) {
9709 Op0 = SkipExtensionForVMULL(N0, DAG);
9711 Op1.getValueType().is64BitVector() &&
9712 "unexpected types for extended operands to VMULL");
9713 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9714 }
9715
9716 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9717 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9718 // vmull q0, d4, d6
9719 // vmlal q0, d5, d6
9720 // is faster than
9721 // vaddl q0, d4, d5
9722 // vmovl q1, d6
9723 // vmul q0, q0, q1
9724 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9725 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9726 EVT Op1VT = Op1.getValueType();
9727 return DAG.getNode(N0->getOpcode(), DL, VT,
9728 DAG.getNode(NewOpc, DL, VT,
9729 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9730 DAG.getNode(NewOpc, DL, VT,
9731 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9732}
9733
9735 SelectionDAG &DAG) {
9736 // TODO: Should this propagate fast-math-flags?
9737
9738 // Convert to float
9739 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9740 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9741 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9742 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9743 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9744 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9745 // Get reciprocal estimate.
9746 // float4 recip = vrecpeq_f32(yf);
9747 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9748 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9749 Y);
9750 // Because char has a smaller range than uchar, we can actually get away
9751 // without any newton steps. This requires that we use a weird bias
9752 // of 0xb000, however (again, this has been exhaustively tested).
9753 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9754 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9755 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9756 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9757 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9758 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9759 // Convert back to short.
9760 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9761 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9762 return X;
9763}
9764
9766 SelectionDAG &DAG) {
9767 // TODO: Should this propagate fast-math-flags?
9768
9769 SDValue N2;
9770 // Convert to float.
9771 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9772 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9773 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9774 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9775 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9776 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9777
9778 // Use reciprocal estimate and one refinement step.
9779 // float4 recip = vrecpeq_f32(yf);
9780 // recip *= vrecpsq_f32(yf, recip);
9781 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9782 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9783 N1);
9784 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9785 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9786 N1, N2);
9787 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9788 // Because short has a smaller range than ushort, we can actually get away
9789 // with only a single newton step. This requires that we use a weird bias
9790 // of 89, however (again, this has been exhaustively tested).
9791 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9792 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9793 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9794 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9795 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9796 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9797 // Convert back to integer and return.
9798 // return vmovn_s32(vcvt_s32_f32(result));
9799 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9800 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9801 return N0;
9802}
9803
9805 const ARMSubtarget *ST) {
9806 EVT VT = Op.getValueType();
9807 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9808 "unexpected type for custom-lowering ISD::SDIV");
9809
9810 SDLoc dl(Op);
9811 SDValue N0 = Op.getOperand(0);
9812 SDValue N1 = Op.getOperand(1);
9813 SDValue N2, N3;
9814
9815 if (VT == MVT::v8i8) {
9816 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9817 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9818
9819 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9820 DAG.getIntPtrConstant(4, dl));
9821 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9822 DAG.getIntPtrConstant(4, dl));
9823 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9824 DAG.getIntPtrConstant(0, dl));
9825 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9826 DAG.getIntPtrConstant(0, dl));
9827
9828 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9829 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9830
9831 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9832 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9833
9834 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9835 return N0;
9836 }
9837 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9838}
9839
9841 const ARMSubtarget *ST) {
9842 // TODO: Should this propagate fast-math-flags?
9843 EVT VT = Op.getValueType();
9844 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9845 "unexpected type for custom-lowering ISD::UDIV");
9846
9847 SDLoc dl(Op);
9848 SDValue N0 = Op.getOperand(0);
9849 SDValue N1 = Op.getOperand(1);
9850 SDValue N2, N3;
9851
9852 if (VT == MVT::v8i8) {
9853 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9854 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9855
9856 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9857 DAG.getIntPtrConstant(4, dl));
9858 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9859 DAG.getIntPtrConstant(4, dl));
9860 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9861 DAG.getIntPtrConstant(0, dl));
9862 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9863 DAG.getIntPtrConstant(0, dl));
9864
9865 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9866 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9867
9868 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9869 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9870
9871 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9872 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9873 MVT::i32),
9874 N0);
9875 return N0;
9876 }
9877
9878 // v4i16 sdiv ... Convert to float.
9879 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9880 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9881 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9882 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9883 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9884 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9885
9886 // Use reciprocal estimate and two refinement steps.
9887 // float4 recip = vrecpeq_f32(yf);
9888 // recip *= vrecpsq_f32(yf, recip);
9889 // recip *= vrecpsq_f32(yf, recip);
9890 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9891 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9892 BN1);
9893 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9894 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9895 BN1, N2);
9896 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9897 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9898 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9899 BN1, N2);
9900 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9901 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9902 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9903 // and that it will never cause us to return an answer too large).
9904 // float4 result = as_float4(as_int4(xf*recip) + 2);
9905 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9906 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9907 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9908 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9909 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9910 // Convert back to integer and return.
9911 // return vmovn_u32(vcvt_s32_f32(result));
9912 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9913 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9914 return N0;
9915}
9916
9918 SDNode *N = Op.getNode();
9919 EVT VT = N->getValueType(0);
9920 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9921
9922 SDValue Carry = Op.getOperand(2);
9923
9924 SDLoc DL(Op);
9925
9926 SDValue Result;
9927 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9928 // This converts the boolean value carry into the carry flag.
9929 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9930
9931 // Do the addition proper using the carry flag we wanted.
9932 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9933 Op.getOperand(1), Carry);
9934
9935 // Now convert the carry flag into a boolean value.
9936 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9937 } else {
9938 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9939 // have to invert the carry first.
9940 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9941 DAG.getConstant(1, DL, MVT::i32), Carry);
9942 // This converts the boolean value carry into the carry flag.
9943 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9944
9945 // Do the subtraction proper using the carry flag we wanted.
9946 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9947 Op.getOperand(1), Carry);
9948
9949 // Now convert the carry flag into a boolean value.
9950 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9951 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9952 // by ISD::USUBO_CARRY, so compute 1 - C.
9953 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9954 DAG.getConstant(1, DL, MVT::i32), Carry);
9955 }
9956
9957 // Return both values.
9958 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9959}
9960
9961SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9962 assert(Subtarget->isTargetDarwin());
9963
9964 // For iOS, we want to call an alternative entry point: __sincos_stret,
9965 // return values are passed via sret.
9966 SDLoc dl(Op);
9967 SDValue Arg = Op.getOperand(0);
9968 EVT ArgVT = Arg.getValueType();
9969 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9970 auto PtrVT = getPointerTy(DAG.getDataLayout());
9971
9973 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9974
9975 // Pair of floats / doubles used to pass the result.
9976 Type *RetTy = StructType::get(ArgTy, ArgTy);
9977 auto &DL = DAG.getDataLayout();
9978
9980 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9981 SDValue SRet;
9982 if (ShouldUseSRet) {
9983 // Create stack object for sret.
9984 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9985 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9986 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9987 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9988
9989 ArgListEntry Entry;
9990 Entry.Node = SRet;
9991 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9992 Entry.IsSExt = false;
9993 Entry.IsZExt = false;
9994 Entry.IsSRet = true;
9995 Args.push_back(Entry);
9997 }
9998
9999 ArgListEntry Entry;
10000 Entry.Node = Arg;
10001 Entry.Ty = ArgTy;
10002 Entry.IsSExt = false;
10003 Entry.IsZExt = false;
10004 Args.push_back(Entry);
10005
10006 RTLIB::Libcall LC =
10007 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
10008 const char *LibcallName = getLibcallName(LC);
10010 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
10011
10013 CLI.setDebugLoc(dl)
10014 .setChain(DAG.getEntryNode())
10015 .setCallee(CC, RetTy, Callee, std::move(Args))
10016 .setDiscardResult(ShouldUseSRet);
10017 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10018
10019 if (!ShouldUseSRet)
10020 return CallResult.first;
10021
10022 SDValue LoadSin =
10023 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
10024
10025 // Address of cos field.
10026 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
10027 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
10028 SDValue LoadCos =
10029 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
10030
10031 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
10032 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
10033 LoadSin.getValue(0), LoadCos.getValue(0));
10034}
10035
10036SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
10037 bool Signed,
10038 SDValue &Chain) const {
10039 EVT VT = Op.getValueType();
10040 assert((VT == MVT::i32 || VT == MVT::i64) &&
10041 "unexpected type for custom lowering DIV");
10042 SDLoc dl(Op);
10043
10044 const auto &DL = DAG.getDataLayout();
10045 const auto &TLI = DAG.getTargetLoweringInfo();
10046
10047 const char *Name = nullptr;
10048 if (Signed)
10049 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
10050 else
10051 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
10052
10054
10056
10057 for (auto AI : {1, 0}) {
10058 ArgListEntry Arg;
10059 Arg.Node = Op.getOperand(AI);
10060 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10061 Args.push_back(Arg);
10062 }
10063
10064 CallLoweringInfo CLI(DAG);
10065 CLI.setDebugLoc(dl)
10066 .setChain(Chain)
10068 ES, std::move(Args));
10069
10070 return LowerCallTo(CLI).first;
10071}
10072
10073// This is a code size optimisation: return the original SDIV node to
10074// DAGCombiner when we don't want to expand SDIV into a sequence of
10075// instructions, and an empty node otherwise which will cause the
10076// SDIV to be expanded in DAGCombine.
10077SDValue
10078ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10079 SelectionDAG &DAG,
10080 SmallVectorImpl<SDNode *> &Created) const {
10081 // TODO: Support SREM
10082 if (N->getOpcode() != ISD::SDIV)
10083 return SDValue();
10084
10085 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10086 const bool MinSize = ST.hasMinSize();
10087 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10088 : ST.hasDivideInARMMode();
10089
10090 // Don't touch vector types; rewriting this may lead to scalarizing
10091 // the int divs.
10092 if (N->getOperand(0).getValueType().isVector())
10093 return SDValue();
10094
10095 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10096 // hwdiv support for this to be really profitable.
10097 if (!(MinSize && HasDivide))
10098 return SDValue();
10099
10100 // ARM mode is a bit simpler than Thumb: we can handle large power
10101 // of 2 immediates with 1 mov instruction; no further checks required,
10102 // just return the sdiv node.
10103 if (!ST.isThumb())
10104 return SDValue(N, 0);
10105
10106 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10107 // and thus lose the code size benefits of a MOVS that requires only 2.
10108 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10109 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10110 if (Divisor.sgt(128))
10111 return SDValue();
10112
10113 return SDValue(N, 0);
10114}
10115
10116SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10117 bool Signed) const {
10118 assert(Op.getValueType() == MVT::i32 &&
10119 "unexpected type for custom lowering DIV");
10120 SDLoc dl(Op);
10121
10122 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10123 DAG.getEntryNode(), Op.getOperand(1));
10124
10125 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10126}
10127
10129 SDLoc DL(N);
10130 SDValue Op = N->getOperand(1);
10131 if (N->getValueType(0) == MVT::i32)
10132 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10133 SDValue Lo, Hi;
10134 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10135 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10136 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10137}
10138
10139void ARMTargetLowering::ExpandDIV_Windows(
10140 SDValue Op, SelectionDAG &DAG, bool Signed,
10142 const auto &DL = DAG.getDataLayout();
10143 const auto &TLI = DAG.getTargetLoweringInfo();
10144
10145 assert(Op.getValueType() == MVT::i64 &&
10146 "unexpected type for custom lowering DIV");
10147 SDLoc dl(Op);
10148
10149 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10150
10151 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10152
10153 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10154 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10155 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10156 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10157
10158 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10159}
10160
10162 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10163 EVT MemVT = LD->getMemoryVT();
10164 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10165 MemVT == MVT::v16i1) &&
10166 "Expected a predicate type!");
10167 assert(MemVT == Op.getValueType());
10168 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10169 "Expected a non-extending load");
10170 assert(LD->isUnindexed() && "Expected a unindexed load");
10171
10172 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10173 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10174 // need to make sure that 8/4/2 bits are actually loaded into the correct
10175 // place, which means loading the value and then shuffling the values into
10176 // the bottom bits of the predicate.
10177 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10178 // for BE).
10179 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10180 // a natural VMSR(load), so needs to be reversed.
10181
10182 SDLoc dl(Op);
10183 SDValue Load = DAG.getExtLoad(
10184 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10186 LD->getMemOperand());
10187 SDValue Val = Load;
10188 if (DAG.getDataLayout().isBigEndian())
10189 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10190 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10191 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10192 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10193 if (MemVT != MVT::v16i1)
10194 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10195 DAG.getConstant(0, dl, MVT::i32));
10196 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10197}
10198
10199void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10200 SelectionDAG &DAG) const {
10201 LoadSDNode *LD = cast<LoadSDNode>(N);
10202 EVT MemVT = LD->getMemoryVT();
10203 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10204
10205 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10206 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10207 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10208 SDLoc dl(N);
10210 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10211 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10212 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10213 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10214 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10215 Results.append({Pair, Result.getValue(2)});
10216 }
10217}
10218
10220 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10221 EVT MemVT = ST->getMemoryVT();
10222 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10223 MemVT == MVT::v16i1) &&
10224 "Expected a predicate type!");
10225 assert(MemVT == ST->getValue().getValueType());
10226 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10227 assert(ST->isUnindexed() && "Expected a unindexed store");
10228
10229 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10230 // top bits unset and a scalar store.
10231 SDLoc dl(Op);
10232 SDValue Build = ST->getValue();
10233 if (MemVT != MVT::v16i1) {
10235 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10236 unsigned Elt = DAG.getDataLayout().isBigEndian()
10237 ? MemVT.getVectorNumElements() - I - 1
10238 : I;
10239 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10240 DAG.getConstant(Elt, dl, MVT::i32)));
10241 }
10242 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10243 Ops.push_back(DAG.getUNDEF(MVT::i32));
10244 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10245 }
10246 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10247 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10248 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10249 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10250 DAG.getConstant(16, dl, MVT::i32));
10251 return DAG.getTruncStore(
10252 ST->getChain(), dl, GRP, ST->getBasePtr(),
10254 ST->getMemOperand());
10255}
10256
10258 const ARMSubtarget *Subtarget) {
10259 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10260 EVT MemVT = ST->getMemoryVT();
10261 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10262
10263 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10264 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10265 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10266 SDNode *N = Op.getNode();
10267 SDLoc dl(N);
10268
10269 SDValue Lo = DAG.getNode(
10270 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10271 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10272 MVT::i32));
10273 SDValue Hi = DAG.getNode(
10274 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10275 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10276 MVT::i32));
10277
10278 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10279 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10280 MemVT, ST->getMemOperand());
10281 } else if (Subtarget->hasMVEIntegerOps() &&
10282 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10283 MemVT == MVT::v16i1))) {
10284 return LowerPredicateStore(Op, DAG);
10285 }
10286
10287 return SDValue();
10288}
10289
10290static bool isZeroVector(SDValue N) {
10291 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10292 (N->getOpcode() == ARMISD::VMOVIMM &&
10293 isNullConstant(N->getOperand(0))));
10294}
10295
10297 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10298 MVT VT = Op.getSimpleValueType();
10299 SDValue Mask = N->getMask();
10300 SDValue PassThru = N->getPassThru();
10301 SDLoc dl(Op);
10302
10303 if (isZeroVector(PassThru))
10304 return Op;
10305
10306 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10307 // zero too, and other values are lowered to a select.
10308 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10309 DAG.getTargetConstant(0, dl, MVT::i32));
10310 SDValue NewLoad = DAG.getMaskedLoad(
10311 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10312 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10313 N->getExtensionType(), N->isExpandingLoad());
10314 SDValue Combo = NewLoad;
10315 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10316 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10317 isZeroVector(PassThru->getOperand(0));
10318 if (!PassThru.isUndef() && !PassThruIsCastZero)
10319 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10320 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10321}
10322
10324 const ARMSubtarget *ST) {
10325 if (!ST->hasMVEIntegerOps())
10326 return SDValue();
10327
10328 SDLoc dl(Op);
10329 unsigned BaseOpcode = 0;
10330 switch (Op->getOpcode()) {
10331 default: llvm_unreachable("Expected VECREDUCE opcode");
10332 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10333 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10334 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10335 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10336 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10337 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10338 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10339 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10340 }
10341
10342 SDValue Op0 = Op->getOperand(0);
10343 EVT VT = Op0.getValueType();
10344 EVT EltVT = VT.getVectorElementType();
10345 unsigned NumElts = VT.getVectorNumElements();
10346 unsigned NumActiveLanes = NumElts;
10347
10348 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10349 NumActiveLanes == 2) &&
10350 "Only expected a power 2 vector size");
10351
10352 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10353 // allows us to easily extract vector elements from the lanes.
10354 while (NumActiveLanes > 4) {
10355 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10356 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10357 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10358 NumActiveLanes /= 2;
10359 }
10360
10361 SDValue Res;
10362 if (NumActiveLanes == 4) {
10363 // The remaining 4 elements are summed sequentially
10364 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10365 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10366 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10367 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10368 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10369 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10370 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10371 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10372 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10373 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10374 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10375 } else {
10376 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10377 DAG.getConstant(0, dl, MVT::i32));
10378 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10379 DAG.getConstant(1, dl, MVT::i32));
10380 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10381 }
10382
10383 // Result type may be wider than element type.
10384 if (EltVT != Op->getValueType(0))
10385 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10386 return Res;
10387}
10388
10390 const ARMSubtarget *ST) {
10391 if (!ST->hasMVEFloatOps())
10392 return SDValue();
10393 return LowerVecReduce(Op, DAG, ST);
10394}
10395
10397 const ARMSubtarget *ST) {
10398 if (!ST->hasNEON())
10399 return SDValue();
10400
10401 SDLoc dl(Op);
10402 SDValue Op0 = Op->getOperand(0);
10403 EVT VT = Op0.getValueType();
10404 EVT EltVT = VT.getVectorElementType();
10405
10406 unsigned PairwiseIntrinsic = 0;
10407 switch (Op->getOpcode()) {
10408 default:
10409 llvm_unreachable("Expected VECREDUCE opcode");
10411 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10412 break;
10414 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10415 break;
10417 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10418 break;
10420 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10421 break;
10422 }
10423 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10424
10425 unsigned NumElts = VT.getVectorNumElements();
10426 unsigned NumActiveLanes = NumElts;
10427
10428 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10429 NumActiveLanes == 2) &&
10430 "Only expected a power 2 vector size");
10431
10432 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10433 if (VT.is128BitVector()) {
10434 SDValue Lo, Hi;
10435 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10436 VT = Lo.getValueType();
10437 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10438 NumActiveLanes /= 2;
10439 }
10440
10441 // Use pairwise reductions until one lane remains
10442 while (NumActiveLanes > 1) {
10443 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10444 NumActiveLanes /= 2;
10445 }
10446
10447 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10448 DAG.getConstant(0, dl, MVT::i32));
10449
10450 // Result type may be wider than element type.
10451 if (EltVT != Op.getValueType()) {
10452 unsigned Extend = 0;
10453 switch (Op->getOpcode()) {
10454 default:
10455 llvm_unreachable("Expected VECREDUCE opcode");
10458 Extend = ISD::ZERO_EXTEND;
10459 break;
10462 Extend = ISD::SIGN_EXTEND;
10463 break;
10464 }
10465 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10466 }
10467 return Res;
10468}
10469
10471 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10472 // Acquire/Release load/store is not legal for targets without a dmb or
10473 // equivalent available.
10474 return SDValue();
10475
10476 // Monotonic load/store is legal for all targets.
10477 return Op;
10478}
10479
10482 SelectionDAG &DAG,
10483 const ARMSubtarget *Subtarget) {
10484 SDLoc DL(N);
10485 // Under Power Management extensions, the cycle-count is:
10486 // mrc p15, #0, <Rt>, c9, c13, #0
10487 SDValue Ops[] = { N->getOperand(0), // Chain
10488 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10489 DAG.getTargetConstant(15, DL, MVT::i32),
10490 DAG.getTargetConstant(0, DL, MVT::i32),
10491 DAG.getTargetConstant(9, DL, MVT::i32),
10492 DAG.getTargetConstant(13, DL, MVT::i32),
10493 DAG.getTargetConstant(0, DL, MVT::i32)
10494 };
10495
10496 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10497 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10498 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10499 DAG.getConstant(0, DL, MVT::i32)));
10500 Results.push_back(Cycles32.getValue(1));
10501}
10502
10504 SDValue V1) {
10505 SDLoc dl(V0.getNode());
10506 SDValue RegClass =
10507 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10508 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10509 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10510 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10511 return SDValue(
10512 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10513}
10514
10516 SDLoc dl(V.getNode());
10517 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10518 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10519 if (isBigEndian)
10520 std::swap(VLo, VHi);
10521 return createGPRPairNode2xi32(DAG, VLo, VHi);
10522}
10523
10526 SelectionDAG &DAG) {
10527 assert(N->getValueType(0) == MVT::i64 &&
10528 "AtomicCmpSwap on types less than 64 should be legal");
10529 SDValue Ops[] = {
10530 createGPRPairNode2xi32(DAG, N->getOperand(1),
10531 DAG.getUNDEF(MVT::i32)), // pointer, temp
10532 createGPRPairNodei64(DAG, N->getOperand(2)), // expected
10533 createGPRPairNodei64(DAG, N->getOperand(3)), // new
10534 N->getOperand(0), // chain in
10535 };
10536 SDNode *CmpSwap = DAG.getMachineNode(
10537 ARM::CMP_SWAP_64, SDLoc(N),
10538 DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
10539
10540 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10541 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10542
10543 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10544
10545 SDValue Lo =
10546 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10547 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10548 SDValue Hi =
10549 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10550 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10551 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10552 Results.push_back(SDValue(CmpSwap, 2));
10553}
10554
10555SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10556 SDLoc dl(Op);
10557 EVT VT = Op.getValueType();
10558 SDValue Chain = Op.getOperand(0);
10559 SDValue LHS = Op.getOperand(1);
10560 SDValue RHS = Op.getOperand(2);
10561 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10562 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10563
10564 // If we don't have instructions of this float type then soften to a libcall
10565 // and use SETCC instead.
10566 if (isUnsupportedFloatingType(LHS.getValueType())) {
10568 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10569 if (!RHS.getNode()) {
10570 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10571 CC = ISD::SETNE;
10572 }
10573 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10574 DAG.getCondCode(CC));
10575 return DAG.getMergeValues({Result, Chain}, dl);
10576 }
10577
10578 ARMCC::CondCodes CondCode, CondCode2;
10579 FPCCToARMCC(CC, CondCode, CondCode2);
10580
10581 SDValue True = DAG.getConstant(1, dl, VT);
10582 SDValue False = DAG.getConstant(0, dl, VT);
10583 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10584 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10585 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10586 if (CondCode2 != ARMCC::AL) {
10587 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10588 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10589 }
10590 return DAG.getMergeValues({Result, Chain}, dl);
10591}
10592
10593SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10595
10596 EVT VT = getPointerTy(DAG.getDataLayout());
10597 SDLoc DL(Op);
10598 int FI = MFI.CreateFixedObject(4, 0, false);
10599 return DAG.getFrameIndex(FI, VT);
10600}
10601
10602SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
10603 SelectionDAG &DAG) const {
10604 SDLoc DL(Op);
10605 MakeLibCallOptions CallOptions;
10606 MVT SVT = Op.getOperand(0).getSimpleValueType();
10607 RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
10608 SDValue Res =
10609 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
10610 return DAG.getBitcast(MVT::i32, Res);
10611}
10612
10614 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10615 switch (Op.getOpcode()) {
10616 default: llvm_unreachable("Don't know how to custom lower this!");
10617 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10618 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10619 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10620 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10621 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10622 case ISD::SELECT: return LowerSELECT(Op, DAG);
10623 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10624 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10625 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10626 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10627 case ISD::VASTART: return LowerVASTART(Op, DAG);
10628 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10629 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10630 case ISD::SINT_TO_FP:
10631 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10634 case ISD::FP_TO_SINT:
10635 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10637 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10638 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10639 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10640 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10641 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10642 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10643 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10644 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10645 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10646 Subtarget);
10647 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10648 case ISD::SHL:
10649 case ISD::SRL:
10650 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10651 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10652 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10653 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10654 case ISD::SRL_PARTS:
10655 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10656 case ISD::CTTZ:
10657 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10658 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10659 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10660 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10661 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10662 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10663 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10664 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10665 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10666 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10667 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10668 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10669 case ISD::SIGN_EXTEND:
10670 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10671 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10672 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10673 case ISD::SET_FPMODE:
10674 return LowerSET_FPMODE(Op, DAG);
10675 case ISD::RESET_FPMODE:
10676 return LowerRESET_FPMODE(Op, DAG);
10677 case ISD::MUL: return LowerMUL(Op, DAG);
10678 case ISD::SDIV:
10679 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10680 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10681 return LowerSDIV(Op, DAG, Subtarget);
10682 case ISD::UDIV:
10683 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10684 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10685 return LowerUDIV(Op, DAG, Subtarget);
10686 case ISD::UADDO_CARRY:
10687 case ISD::USUBO_CARRY:
10688 return LowerUADDSUBO_CARRY(Op, DAG);
10689 case ISD::SADDO:
10690 case ISD::SSUBO:
10691 return LowerSignedALUO(Op, DAG);
10692 case ISD::UADDO:
10693 case ISD::USUBO:
10694 return LowerUnsignedALUO(Op, DAG);
10695 case ISD::SADDSAT:
10696 case ISD::SSUBSAT:
10697 case ISD::UADDSAT:
10698 case ISD::USUBSAT:
10699 return LowerADDSUBSAT(Op, DAG, Subtarget);
10700 case ISD::LOAD:
10701 return LowerPredicateLoad(Op, DAG);
10702 case ISD::STORE:
10703 return LowerSTORE(Op, DAG, Subtarget);
10704 case ISD::MLOAD:
10705 return LowerMLOAD(Op, DAG);
10706 case ISD::VECREDUCE_MUL:
10707 case ISD::VECREDUCE_AND:
10708 case ISD::VECREDUCE_OR:
10709 case ISD::VECREDUCE_XOR:
10710 return LowerVecReduce(Op, DAG, Subtarget);
10715 return LowerVecReduceF(Op, DAG, Subtarget);
10720 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10721 case ISD::ATOMIC_LOAD:
10722 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10723 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10724 case ISD::SDIVREM:
10725 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10727 if (Subtarget->isTargetWindows())
10728 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10729 llvm_unreachable("Don't know how to custom lower this!");
10731 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10733 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10734 case ISD::STRICT_FSETCC:
10735 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10736 case ISD::SPONENTRY:
10737 return LowerSPONENTRY(Op, DAG);
10738 case ISD::FP_TO_BF16:
10739 return LowerFP_TO_BF16(Op, DAG);
10740 case ARMISD::WIN__DBZCHK: return SDValue();
10741 }
10742}
10743
10745 SelectionDAG &DAG) {
10746 unsigned IntNo = N->getConstantOperandVal(0);
10747 unsigned Opc = 0;
10748 if (IntNo == Intrinsic::arm_smlald)
10749 Opc = ARMISD::SMLALD;
10750 else if (IntNo == Intrinsic::arm_smlaldx)
10751 Opc = ARMISD::SMLALDX;
10752 else if (IntNo == Intrinsic::arm_smlsld)
10753 Opc = ARMISD::SMLSLD;
10754 else if (IntNo == Intrinsic::arm_smlsldx)
10755 Opc = ARMISD::SMLSLDX;
10756 else
10757 return;
10758
10759 SDLoc dl(N);
10760 SDValue Lo, Hi;
10761 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10762
10763 SDValue LongMul = DAG.getNode(Opc, dl,
10764 DAG.getVTList(MVT::i32, MVT::i32),
10765 N->getOperand(1), N->getOperand(2),
10766 Lo, Hi);
10767 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10768 LongMul.getValue(0), LongMul.getValue(1)));
10769}
10770
10771/// ReplaceNodeResults - Replace the results of node with an illegal result
10772/// type with new values built out of custom code.
10775 SelectionDAG &DAG) const {
10776 SDValue Res;
10777 switch (N->getOpcode()) {
10778 default:
10779 llvm_unreachable("Don't know how to custom expand this!");
10780 case ISD::READ_REGISTER:
10782 break;
10783 case ISD::BITCAST:
10784 Res = ExpandBITCAST(N, DAG, Subtarget);
10785 break;
10786 case ISD::SRL:
10787 case ISD::SRA:
10788 case ISD::SHL:
10789 Res = Expand64BitShift(N, DAG, Subtarget);
10790 break;
10791 case ISD::SREM:
10792 case ISD::UREM:
10793 Res = LowerREM(N, DAG);
10794 break;
10795 case ISD::SDIVREM:
10796 case ISD::UDIVREM:
10797 Res = LowerDivRem(SDValue(N, 0), DAG);
10798 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10799 Results.push_back(Res.getValue(0));
10800 Results.push_back(Res.getValue(1));
10801 return;
10802 case ISD::SADDSAT:
10803 case ISD::SSUBSAT:
10804 case ISD::UADDSAT:
10805 case ISD::USUBSAT:
10806 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10807 break;
10809 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10810 return;
10811 case ISD::UDIV:
10812 case ISD::SDIV:
10813 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10814 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10815 Results);
10818 return;
10820 return ReplaceLongIntrinsic(N, Results, DAG);
10821 case ISD::LOAD:
10822 LowerLOAD(N, Results, DAG);
10823 break;
10824 case ISD::TRUNCATE:
10825 Res = LowerTruncate(N, DAG, Subtarget);
10826 break;
10827 case ISD::SIGN_EXTEND:
10828 case ISD::ZERO_EXTEND:
10829 Res = LowerVectorExtend(N, DAG, Subtarget);
10830 break;
10833 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10834 break;
10835 }
10836 if (Res.getNode())
10837 Results.push_back(Res);
10838}
10839
10840//===----------------------------------------------------------------------===//
10841// ARM Scheduler Hooks
10842//===----------------------------------------------------------------------===//
10843
10844/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10845/// registers the function context.
10846void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10848 MachineBasicBlock *DispatchBB,
10849 int FI) const {
10850 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10851 "ROPI/RWPI not currently supported with SjLj");
10852 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10853 DebugLoc dl = MI.getDebugLoc();
10854 MachineFunction *MF = MBB->getParent();
10858 const Function &F = MF->getFunction();
10859
10860 bool isThumb = Subtarget->isThumb();
10861 bool isThumb2 = Subtarget->isThumb2();
10862
10863 unsigned PCLabelId = AFI->createPICLabelUId();
10864 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10866 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10867 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10868
10869 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10870 : &ARM::GPRRegClass;
10871
10872 // Grab constant pool and fixed stack memory operands.
10873 MachineMemOperand *CPMMO =
10876
10877 MachineMemOperand *FIMMOSt =
10880
10881 // Load the address of the dispatch MBB into the jump buffer.
10882 if (isThumb2) {
10883 // Incoming value: jbuf
10884 // ldr.n r5, LCPI1_1
10885 // orr r5, r5, #1
10886 // add r5, pc
10887 // str r5, [$jbuf, #+4] ; &jbuf[1]
10888 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10889 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10891 .addMemOperand(CPMMO)
10893 // Set the low bit because of thumb mode.
10894 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10895 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10896 .addReg(NewVReg1, RegState::Kill)
10897 .addImm(0x01)
10899 .add(condCodeOp());
10900 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10901 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10902 .addReg(NewVReg2, RegState::Kill)
10903 .addImm(PCLabelId);
10904 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10905 .addReg(NewVReg3, RegState::Kill)
10906 .addFrameIndex(FI)
10907 .addImm(36) // &jbuf[1] :: pc
10908 .addMemOperand(FIMMOSt)
10910 } else if (isThumb) {
10911 // Incoming value: jbuf
10912 // ldr.n r1, LCPI1_4
10913 // add r1, pc
10914 // mov r2, #1
10915 // orrs r1, r2
10916 // add r2, $jbuf, #+4 ; &jbuf[1]
10917 // str r1, [r2]
10918 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10919 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10921 .addMemOperand(CPMMO)
10923 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10924 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10925 .addReg(NewVReg1, RegState::Kill)
10926 .addImm(PCLabelId);
10927 // Set the low bit because of thumb mode.
10928 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10929 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10930 .addReg(ARM::CPSR, RegState::Define)
10931 .addImm(1)
10933 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10934 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10935 .addReg(ARM::CPSR, RegState::Define)
10936 .addReg(NewVReg2, RegState::Kill)
10937 .addReg(NewVReg3, RegState::Kill)
10939 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10940 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10941 .addFrameIndex(FI)
10942 .addImm(36); // &jbuf[1] :: pc
10943 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10944 .addReg(NewVReg4, RegState::Kill)
10945 .addReg(NewVReg5, RegState::Kill)
10946 .addImm(0)
10947 .addMemOperand(FIMMOSt)
10949 } else {
10950 // Incoming value: jbuf
10951 // ldr r1, LCPI1_1
10952 // add r1, pc, r1
10953 // str r1, [$jbuf, #+4] ; &jbuf[1]
10954 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10955 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10957 .addImm(0)
10958 .addMemOperand(CPMMO)
10960 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10961 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10962 .addReg(NewVReg1, RegState::Kill)
10963 .addImm(PCLabelId)
10965 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10966 .addReg(NewVReg2, RegState::Kill)
10967 .addFrameIndex(FI)
10968 .addImm(36) // &jbuf[1] :: pc
10969 .addMemOperand(FIMMOSt)
10971 }
10972}
10973
10974void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10975 MachineBasicBlock *MBB) const {
10976 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10977 DebugLoc dl = MI.getDebugLoc();
10978 MachineFunction *MF = MBB->getParent();
10980 MachineFrameInfo &MFI = MF->getFrameInfo();
10981 int FI = MFI.getFunctionContextIndex();
10982
10983 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10984 : &ARM::GPRnopcRegClass;
10985
10986 // Get a mapping of the call site numbers to all of the landing pads they're
10987 // associated with.
10989 unsigned MaxCSNum = 0;
10990 for (MachineBasicBlock &BB : *MF) {
10991 if (!BB.isEHPad())
10992 continue;
10993
10994 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10995 // pad.
10996 for (MachineInstr &II : BB) {
10997 if (!II.isEHLabel())
10998 continue;
10999
11000 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
11001 if (!MF->hasCallSiteLandingPad(Sym)) continue;
11002
11003 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
11004 for (unsigned Idx : CallSiteIdxs) {
11005 CallSiteNumToLPad[Idx].push_back(&BB);
11006 MaxCSNum = std::max(MaxCSNum, Idx);
11007 }
11008 break;
11009 }
11010 }
11011
11012 // Get an ordered list of the machine basic blocks for the jump table.
11013 std::vector<MachineBasicBlock*> LPadList;
11015 LPadList.reserve(CallSiteNumToLPad.size());
11016 for (unsigned I = 1; I <= MaxCSNum; ++I) {
11017 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
11018 for (MachineBasicBlock *MBB : MBBList) {
11019 LPadList.push_back(MBB);
11020 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
11021 }
11022 }
11023
11024 assert(!LPadList.empty() &&
11025 "No landing pad destinations for the dispatch jump table!");
11026
11027 // Create the jump table and associated information.
11029 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
11030 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
11031
11032 // Create the MBBs for the dispatch code.
11033
11034 // Shove the dispatch's address into the return slot in the function context.
11035 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
11036 DispatchBB->setIsEHPad();
11037
11038 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11039 unsigned trap_opcode;
11040 if (Subtarget->isThumb())
11041 trap_opcode = ARM::tTRAP;
11042 else
11043 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
11044
11045 BuildMI(TrapBB, dl, TII->get(trap_opcode));
11046 DispatchBB->addSuccessor(TrapBB);
11047
11048 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11049 DispatchBB->addSuccessor(DispContBB);
11050
11051 // Insert and MBBs.
11052 MF->insert(MF->end(), DispatchBB);
11053 MF->insert(MF->end(), DispContBB);
11054 MF->insert(MF->end(), TrapBB);
11055
11056 // Insert code into the entry block that creates and registers the function
11057 // context.
11058 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11059
11060 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11063
11065 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11066
11067 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11068 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11069
11070 // Add a register mask with no preserved registers. This results in all
11071 // registers being marked as clobbered. This can't work if the dispatch block
11072 // is in a Thumb1 function and is linked with ARM code which uses the FP
11073 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11075
11076 bool IsPositionIndependent = isPositionIndependent();
11077 unsigned NumLPads = LPadList.size();
11078 if (Subtarget->isThumb2()) {
11079 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11080 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11081 .addFrameIndex(FI)
11082 .addImm(4)
11083 .addMemOperand(FIMMOLd)
11085
11086 if (NumLPads < 256) {
11087 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11088 .addReg(NewVReg1)
11089 .addImm(LPadList.size())
11091 } else {
11092 Register VReg1 = MRI->createVirtualRegister(TRC);
11093 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11094 .addImm(NumLPads & 0xFFFF)
11096
11097 unsigned VReg2 = VReg1;
11098 if ((NumLPads & 0xFFFF0000) != 0) {
11099 VReg2 = MRI->createVirtualRegister(TRC);
11100 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11101 .addReg(VReg1)
11102 .addImm(NumLPads >> 16)
11104 }
11105
11106 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11107 .addReg(NewVReg1)
11108 .addReg(VReg2)
11110 }
11111
11112 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11113 .addMBB(TrapBB)
11115 .addReg(ARM::CPSR);
11116
11117 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11118 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11119 .addJumpTableIndex(MJTI)
11121
11122 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11123 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11124 .addReg(NewVReg3, RegState::Kill)
11125 .addReg(NewVReg1)
11128 .add(condCodeOp());
11129
11130 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11131 .addReg(NewVReg4, RegState::Kill)
11132 .addReg(NewVReg1)
11133 .addJumpTableIndex(MJTI);
11134 } else if (Subtarget->isThumb()) {
11135 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11136 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11137 .addFrameIndex(FI)
11138 .addImm(1)
11139 .addMemOperand(FIMMOLd)
11141
11142 if (NumLPads < 256) {
11143 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11144 .addReg(NewVReg1)
11145 .addImm(NumLPads)
11147 } else {
11148 MachineConstantPool *ConstantPool = MF->getConstantPool();
11149 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11150 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11151
11152 // MachineConstantPool wants an explicit alignment.
11153 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11154 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11155
11156 Register VReg1 = MRI->createVirtualRegister(TRC);
11157 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11158 .addReg(VReg1, RegState::Define)
11161 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11162 .addReg(NewVReg1)
11163 .addReg(VReg1)
11165 }
11166
11167 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11168 .addMBB(TrapBB)
11170 .addReg(ARM::CPSR);
11171
11172 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11173 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11174 .addReg(ARM::CPSR, RegState::Define)
11175 .addReg(NewVReg1)
11176 .addImm(2)
11178
11179 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11180 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11181 .addJumpTableIndex(MJTI)
11183
11184 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11185 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11186 .addReg(ARM::CPSR, RegState::Define)
11187 .addReg(NewVReg2, RegState::Kill)
11188 .addReg(NewVReg3)
11190
11191 MachineMemOperand *JTMMOLd =
11192 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11194
11195 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11196 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11197 .addReg(NewVReg4, RegState::Kill)
11198 .addImm(0)
11199 .addMemOperand(JTMMOLd)
11201
11202 unsigned NewVReg6 = NewVReg5;
11203 if (IsPositionIndependent) {
11204 NewVReg6 = MRI->createVirtualRegister(TRC);
11205 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11206 .addReg(ARM::CPSR, RegState::Define)
11207 .addReg(NewVReg5, RegState::Kill)
11208 .addReg(NewVReg3)
11210 }
11211
11212 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11213 .addReg(NewVReg6, RegState::Kill)
11214 .addJumpTableIndex(MJTI);
11215 } else {
11216 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11217 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11218 .addFrameIndex(FI)
11219 .addImm(4)
11220 .addMemOperand(FIMMOLd)
11222
11223 if (NumLPads < 256) {
11224 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11225 .addReg(NewVReg1)
11226 .addImm(NumLPads)
11228 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11229 Register VReg1 = MRI->createVirtualRegister(TRC);
11230 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11231 .addImm(NumLPads & 0xFFFF)
11233
11234 unsigned VReg2 = VReg1;
11235 if ((NumLPads & 0xFFFF0000) != 0) {
11236 VReg2 = MRI->createVirtualRegister(TRC);
11237 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11238 .addReg(VReg1)
11239 .addImm(NumLPads >> 16)
11241 }
11242
11243 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11244 .addReg(NewVReg1)
11245 .addReg(VReg2)
11247 } else {
11248 MachineConstantPool *ConstantPool = MF->getConstantPool();
11249 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11250 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11251
11252 // MachineConstantPool wants an explicit alignment.
11253 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11254 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11255
11256 Register VReg1 = MRI->createVirtualRegister(TRC);
11257 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11258 .addReg(VReg1, RegState::Define)
11260 .addImm(0)
11262 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11263 .addReg(NewVReg1)
11264 .addReg(VReg1, RegState::Kill)
11266 }
11267
11268 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11269 .addMBB(TrapBB)
11271 .addReg(ARM::CPSR);
11272
11273 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11274 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11275 .addReg(NewVReg1)
11278 .add(condCodeOp());
11279 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11280 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11281 .addJumpTableIndex(MJTI)
11283
11284 MachineMemOperand *JTMMOLd =
11285 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11287 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11288 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11289 .addReg(NewVReg3, RegState::Kill)
11290 .addReg(NewVReg4)
11291 .addImm(0)
11292 .addMemOperand(JTMMOLd)
11294
11295 if (IsPositionIndependent) {
11296 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11297 .addReg(NewVReg5, RegState::Kill)
11298 .addReg(NewVReg4)
11299 .addJumpTableIndex(MJTI);
11300 } else {
11301 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11302 .addReg(NewVReg5, RegState::Kill)
11303 .addJumpTableIndex(MJTI);
11304 }
11305 }
11306
11307 // Add the jump table entries as successors to the MBB.
11309 for (MachineBasicBlock *CurMBB : LPadList) {
11310 if (SeenMBBs.insert(CurMBB).second)
11311 DispContBB->addSuccessor(CurMBB);
11312 }
11313
11314 // N.B. the order the invoke BBs are processed in doesn't matter here.
11315 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11317 for (MachineBasicBlock *BB : InvokeBBs) {
11318
11319 // Remove the landing pad successor from the invoke block and replace it
11320 // with the new dispatch block.
11321 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11322 while (!Successors.empty()) {
11323 MachineBasicBlock *SMBB = Successors.pop_back_val();
11324 if (SMBB->isEHPad()) {
11325 BB->removeSuccessor(SMBB);
11326 MBBLPads.push_back(SMBB);
11327 }
11328 }
11329
11330 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11331 BB->normalizeSuccProbs();
11332
11333 // Find the invoke call and mark all of the callee-saved registers as
11334 // 'implicit defined' so that they're spilled. This prevents code from
11335 // moving instructions to before the EH block, where they will never be
11336 // executed.
11338 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11339 if (!II->isCall()) continue;
11340
11343 OI = II->operands_begin(), OE = II->operands_end();
11344 OI != OE; ++OI) {
11345 if (!OI->isReg()) continue;
11346 DefRegs[OI->getReg()] = true;
11347 }
11348
11349 MachineInstrBuilder MIB(*MF, &*II);
11350
11351 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11352 unsigned Reg = SavedRegs[i];
11353 if (Subtarget->isThumb2() &&
11354 !ARM::tGPRRegClass.contains(Reg) &&
11355 !ARM::hGPRRegClass.contains(Reg))
11356 continue;
11357 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11358 continue;
11359 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11360 continue;
11361 if (!DefRegs[Reg])
11363 }
11364
11365 break;
11366 }
11367 }
11368
11369 // Mark all former landing pads as non-landing pads. The dispatch is the only
11370 // landing pad now.
11371 for (MachineBasicBlock *MBBLPad : MBBLPads)
11372 MBBLPad->setIsEHPad(false);
11373
11374 // The instruction is gone now.
11375 MI.eraseFromParent();
11376}
11377
11378static
11380 for (MachineBasicBlock *S : MBB->successors())
11381 if (S != Succ)
11382 return S;
11383 llvm_unreachable("Expecting a BB with two successors!");
11384}
11385
11386/// Return the load opcode for a given load size. If load size >= 8,
11387/// neon opcode will be returned.
11388static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11389 if (LdSize >= 8)
11390 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11391 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11392 if (IsThumb1)
11393 return LdSize == 4 ? ARM::tLDRi
11394 : LdSize == 2 ? ARM::tLDRHi
11395 : LdSize == 1 ? ARM::tLDRBi : 0;
11396 if (IsThumb2)
11397 return LdSize == 4 ? ARM::t2LDR_POST
11398 : LdSize == 2 ? ARM::t2LDRH_POST
11399 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11400 return LdSize == 4 ? ARM::LDR_POST_IMM
11401 : LdSize == 2 ? ARM::LDRH_POST
11402 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11403}
11404
11405/// Return the store opcode for a given store size. If store size >= 8,
11406/// neon opcode will be returned.
11407static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11408 if (StSize >= 8)
11409 return StSize == 16 ? ARM::VST1q32wb_fixed
11410 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11411 if (IsThumb1)
11412 return StSize == 4 ? ARM::tSTRi
11413 : StSize == 2 ? ARM::tSTRHi
11414 : StSize == 1 ? ARM::tSTRBi : 0;
11415 if (IsThumb2)
11416 return StSize == 4 ? ARM::t2STR_POST
11417 : StSize == 2 ? ARM::t2STRH_POST
11418 : StSize == 1 ? ARM::t2STRB_POST : 0;
11419 return StSize == 4 ? ARM::STR_POST_IMM
11420 : StSize == 2 ? ARM::STRH_POST
11421 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11422}
11423
11424/// Emit a post-increment load operation with given size. The instructions
11425/// will be added to BB at Pos.
11427 const TargetInstrInfo *TII, const DebugLoc &dl,
11428 unsigned LdSize, unsigned Data, unsigned AddrIn,
11429 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11430 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11431 assert(LdOpc != 0 && "Should have a load opcode");
11432 if (LdSize >= 8) {
11433 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11434 .addReg(AddrOut, RegState::Define)
11435 .addReg(AddrIn)
11436 .addImm(0)
11438 } else if (IsThumb1) {
11439 // load + update AddrIn
11440 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11441 .addReg(AddrIn)
11442 .addImm(0)
11444 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11445 .add(t1CondCodeOp())
11446 .addReg(AddrIn)
11447 .addImm(LdSize)
11449 } else if (IsThumb2) {
11450 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11451 .addReg(AddrOut, RegState::Define)
11452 .addReg(AddrIn)
11453 .addImm(LdSize)
11455 } else { // arm
11456 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11457 .addReg(AddrOut, RegState::Define)
11458 .addReg(AddrIn)
11459 .addReg(0)
11460 .addImm(LdSize)
11462 }
11463}
11464
11465/// Emit a post-increment store operation with given size. The instructions
11466/// will be added to BB at Pos.
11468 const TargetInstrInfo *TII, const DebugLoc &dl,
11469 unsigned StSize, unsigned Data, unsigned AddrIn,
11470 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11471 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11472 assert(StOpc != 0 && "Should have a store opcode");
11473 if (StSize >= 8) {
11474 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11475 .addReg(AddrIn)
11476 .addImm(0)
11477 .addReg(Data)
11479 } else if (IsThumb1) {
11480 // store + update AddrIn
11481 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11482 .addReg(Data)
11483 .addReg(AddrIn)
11484 .addImm(0)
11486 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11487 .add(t1CondCodeOp())
11488 .addReg(AddrIn)
11489 .addImm(StSize)
11491 } else if (IsThumb2) {
11492 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11493 .addReg(Data)
11494 .addReg(AddrIn)
11495 .addImm(StSize)
11497 } else { // arm
11498 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11499 .addReg(Data)
11500 .addReg(AddrIn)
11501 .addReg(0)
11502 .addImm(StSize)
11504 }
11505}
11506
11508ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11509 MachineBasicBlock *BB) const {
11510 // This pseudo instruction has 3 operands: dst, src, size
11511 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11512 // Otherwise, we will generate unrolled scalar copies.
11513 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11514 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11516
11517 Register dest = MI.getOperand(0).getReg();
11518 Register src = MI.getOperand(1).getReg();
11519 unsigned SizeVal = MI.getOperand(2).getImm();
11520 unsigned Alignment = MI.getOperand(3).getImm();
11521 DebugLoc dl = MI.getDebugLoc();
11522
11523 MachineFunction *MF = BB->getParent();
11525 unsigned UnitSize = 0;
11526 const TargetRegisterClass *TRC = nullptr;
11527 const TargetRegisterClass *VecTRC = nullptr;
11528
11529 bool IsThumb1 = Subtarget->isThumb1Only();
11530 bool IsThumb2 = Subtarget->isThumb2();
11531 bool IsThumb = Subtarget->isThumb();
11532
11533 if (Alignment & 1) {
11534 UnitSize = 1;
11535 } else if (Alignment & 2) {
11536 UnitSize = 2;
11537 } else {
11538 // Check whether we can use NEON instructions.
11539 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11540 Subtarget->hasNEON()) {
11541 if ((Alignment % 16 == 0) && SizeVal >= 16)
11542 UnitSize = 16;
11543 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11544 UnitSize = 8;
11545 }
11546 // Can't use NEON instructions.
11547 if (UnitSize == 0)
11548 UnitSize = 4;
11549 }
11550
11551 // Select the correct opcode and register class for unit size load/store
11552 bool IsNeon = UnitSize >= 8;
11553 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11554 if (IsNeon)
11555 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11556 : UnitSize == 8 ? &ARM::DPRRegClass
11557 : nullptr;
11558
11559 unsigned BytesLeft = SizeVal % UnitSize;
11560 unsigned LoopSize = SizeVal - BytesLeft;
11561
11562 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11563 // Use LDR and STR to copy.
11564 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11565 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11566 unsigned srcIn = src;
11567 unsigned destIn = dest;
11568 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11569 Register srcOut = MRI.createVirtualRegister(TRC);
11570 Register destOut = MRI.createVirtualRegister(TRC);
11571 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11572 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11573 IsThumb1, IsThumb2);
11574 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11575 IsThumb1, IsThumb2);
11576 srcIn = srcOut;
11577 destIn = destOut;
11578 }
11579
11580 // Handle the leftover bytes with LDRB and STRB.
11581 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11582 // [destOut] = STRB_POST(scratch, destIn, 1)
11583 for (unsigned i = 0; i < BytesLeft; i++) {
11584 Register srcOut = MRI.createVirtualRegister(TRC);
11585 Register destOut = MRI.createVirtualRegister(TRC);
11586 Register scratch = MRI.createVirtualRegister(TRC);
11587 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11588 IsThumb1, IsThumb2);
11589 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11590 IsThumb1, IsThumb2);
11591 srcIn = srcOut;
11592 destIn = destOut;
11593 }
11594 MI.eraseFromParent(); // The instruction is gone now.
11595 return BB;
11596 }
11597
11598 // Expand the pseudo op to a loop.
11599 // thisMBB:
11600 // ...
11601 // movw varEnd, # --> with thumb2
11602 // movt varEnd, #
11603 // ldrcp varEnd, idx --> without thumb2
11604 // fallthrough --> loopMBB
11605 // loopMBB:
11606 // PHI varPhi, varEnd, varLoop
11607 // PHI srcPhi, src, srcLoop
11608 // PHI destPhi, dst, destLoop
11609 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11610 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11611 // subs varLoop, varPhi, #UnitSize
11612 // bne loopMBB
11613 // fallthrough --> exitMBB
11614 // exitMBB:
11615 // epilogue to handle left-over bytes
11616 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11617 // [destOut] = STRB_POST(scratch, destLoop, 1)
11618 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11619 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11620 MF->insert(It, loopMBB);
11621 MF->insert(It, exitMBB);
11622
11623 // Set the call frame size on entry to the new basic blocks.
11624 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11625 loopMBB->setCallFrameSize(CallFrameSize);
11626 exitMBB->setCallFrameSize(CallFrameSize);
11627
11628 // Transfer the remainder of BB and its successor edges to exitMBB.
11629 exitMBB->splice(exitMBB->begin(), BB,
11630 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11632
11633 // Load an immediate to varEnd.
11634 Register varEnd = MRI.createVirtualRegister(TRC);
11635 if (Subtarget->useMovt()) {
11636 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11637 varEnd)
11638 .addImm(LoopSize);
11639 } else if (Subtarget->genExecuteOnly()) {
11640 assert(IsThumb && "Non-thumb expected to have used movt");
11641 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11642 } else {
11644 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11645 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11646
11647 // MachineConstantPool wants an explicit alignment.
11648 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11649 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11650 MachineMemOperand *CPMMO =
11653
11654 if (IsThumb)
11655 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11656 .addReg(varEnd, RegState::Define)
11659 .addMemOperand(CPMMO);
11660 else
11661 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11662 .addReg(varEnd, RegState::Define)
11664 .addImm(0)
11666 .addMemOperand(CPMMO);
11667 }
11668 BB->addSuccessor(loopMBB);
11669
11670 // Generate the loop body:
11671 // varPhi = PHI(varLoop, varEnd)
11672 // srcPhi = PHI(srcLoop, src)
11673 // destPhi = PHI(destLoop, dst)
11674 MachineBasicBlock *entryBB = BB;
11675 BB = loopMBB;
11676 Register varLoop = MRI.createVirtualRegister(TRC);
11677 Register varPhi = MRI.createVirtualRegister(TRC);
11678 Register srcLoop = MRI.createVirtualRegister(TRC);
11679 Register srcPhi = MRI.createVirtualRegister(TRC);
11680 Register destLoop = MRI.createVirtualRegister(TRC);
11681 Register destPhi = MRI.createVirtualRegister(TRC);
11682
11683 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11684 .addReg(varLoop).addMBB(loopMBB)
11685 .addReg(varEnd).addMBB(entryBB);
11686 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11687 .addReg(srcLoop).addMBB(loopMBB)
11688 .addReg(src).addMBB(entryBB);
11689 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11690 .addReg(destLoop).addMBB(loopMBB)
11691 .addReg(dest).addMBB(entryBB);
11692
11693 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11694 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11695 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11696 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11697 IsThumb1, IsThumb2);
11698 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11699 IsThumb1, IsThumb2);
11700
11701 // Decrement loop variable by UnitSize.
11702 if (IsThumb1) {
11703 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11704 .add(t1CondCodeOp())
11705 .addReg(varPhi)
11706 .addImm(UnitSize)
11708 } else {
11710 BuildMI(*BB, BB->end(), dl,
11711 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11712 MIB.addReg(varPhi)
11713 .addImm(UnitSize)
11715 .add(condCodeOp());
11716 MIB->getOperand(5).setReg(ARM::CPSR);
11717 MIB->getOperand(5).setIsDef(true);
11718 }
11719 BuildMI(*BB, BB->end(), dl,
11720 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11721 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11722
11723 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11724 BB->addSuccessor(loopMBB);
11725 BB->addSuccessor(exitMBB);
11726
11727 // Add epilogue to handle BytesLeft.
11728 BB = exitMBB;
11729 auto StartOfExit = exitMBB->begin();
11730
11731 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11732 // [destOut] = STRB_POST(scratch, destLoop, 1)
11733 unsigned srcIn = srcLoop;
11734 unsigned destIn = destLoop;
11735 for (unsigned i = 0; i < BytesLeft; i++) {
11736 Register srcOut = MRI.createVirtualRegister(TRC);
11737 Register destOut = MRI.createVirtualRegister(TRC);
11738 Register scratch = MRI.createVirtualRegister(TRC);
11739 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11740 IsThumb1, IsThumb2);
11741 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11742 IsThumb1, IsThumb2);
11743 srcIn = srcOut;
11744 destIn = destOut;
11745 }
11746
11747 MI.eraseFromParent(); // The instruction is gone now.
11748 return BB;
11749}
11750
11752ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11753 MachineBasicBlock *MBB) const {
11755 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11756 DebugLoc DL = MI.getDebugLoc();
11757
11758 assert(Subtarget->isTargetWindows() &&
11759 "__chkstk is only supported on Windows");
11760 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11761
11762 // __chkstk takes the number of words to allocate on the stack in R4, and
11763 // returns the stack adjustment in number of bytes in R4. This will not
11764 // clober any other registers (other than the obvious lr).
11765 //
11766 // Although, technically, IP should be considered a register which may be
11767 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11768 // thumb-2 environment, so there is no interworking required. As a result, we
11769 // do not expect a veneer to be emitted by the linker, clobbering IP.
11770 //
11771 // Each module receives its own copy of __chkstk, so no import thunk is
11772 // required, again, ensuring that IP is not clobbered.
11773 //
11774 // Finally, although some linkers may theoretically provide a trampoline for
11775 // out of range calls (which is quite common due to a 32M range limitation of
11776 // branches for Thumb), we can generate the long-call version via
11777 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11778 // IP.
11779
11780 switch (TM.getCodeModel()) {
11781 case CodeModel::Tiny:
11782 llvm_unreachable("Tiny code model not available on ARM.");
11783 case CodeModel::Small:
11784 case CodeModel::Medium:
11785 case CodeModel::Kernel:
11786 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11788 .addExternalSymbol("__chkstk")
11791 .addReg(ARM::R12,
11793 .addReg(ARM::CPSR,
11795 break;
11796 case CodeModel::Large: {
11798 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11799
11800 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11801 .addExternalSymbol("__chkstk");
11804 .addReg(Reg, RegState::Kill)
11807 .addReg(ARM::R12,
11809 .addReg(ARM::CPSR,
11811 break;
11812 }
11813 }
11814
11815 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11816 .addReg(ARM::SP, RegState::Kill)
11817 .addReg(ARM::R4, RegState::Kill)
11820 .add(condCodeOp());
11821
11822 MI.eraseFromParent();
11823 return MBB;
11824}
11825
11827ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11828 MachineBasicBlock *MBB) const {
11829 DebugLoc DL = MI.getDebugLoc();
11830 MachineFunction *MF = MBB->getParent();
11831 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11832
11834 MF->insert(++MBB->getIterator(), ContBB);
11835 ContBB->splice(ContBB->begin(), MBB,
11836 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11838 MBB->addSuccessor(ContBB);
11839
11841 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11842 MF->push_back(TrapBB);
11843 MBB->addSuccessor(TrapBB);
11844
11845 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11846 .addReg(MI.getOperand(0).getReg())
11847 .addImm(0)
11849 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11850 .addMBB(TrapBB)
11852 .addReg(ARM::CPSR);
11853
11854 MI.eraseFromParent();
11855 return ContBB;
11856}
11857
11858// The CPSR operand of SelectItr might be missing a kill marker
11859// because there were multiple uses of CPSR, and ISel didn't know
11860// which to mark. Figure out whether SelectItr should have had a
11861// kill marker, and set it if it should. Returns the correct kill
11862// marker value.
11865 const TargetRegisterInfo* TRI) {
11866 // Scan forward through BB for a use/def of CPSR.
11867 MachineBasicBlock::iterator miI(std::next(SelectItr));
11868 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11869 const MachineInstr& mi = *miI;
11870 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11871 return false;
11872 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11873 break; // Should have kill-flag - update below.
11874 }
11875
11876 // If we hit the end of the block, check whether CPSR is live into a
11877 // successor.
11878 if (miI == BB->end()) {
11879 for (MachineBasicBlock *Succ : BB->successors())
11880 if (Succ->isLiveIn(ARM::CPSR))
11881 return false;
11882 }
11883
11884 // We found a def, or hit the end of the basic block and CPSR wasn't live
11885 // out. SelectMI should have a kill flag on CPSR.
11886 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11887 return true;
11888}
11889
11890/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11891/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11893 MachineBasicBlock *TpLoopBody,
11894 MachineBasicBlock *TpExit, Register OpSizeReg,
11895 const TargetInstrInfo *TII, DebugLoc Dl,
11897 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11898 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11899 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11900 .addUse(OpSizeReg)
11901 .addImm(15)
11903 .addReg(0);
11904
11905 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11906 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11907 .addUse(AddDestReg, RegState::Kill)
11908 .addImm(4)
11910 .addReg(0);
11911
11912 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11913 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11914 .addUse(LsrDestReg, RegState::Kill);
11915
11916 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11917 .addUse(TotalIterationsReg)
11918 .addMBB(TpExit);
11919
11920 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11921 .addMBB(TpLoopBody)
11923
11924 return TotalIterationsReg;
11925}
11926
11927/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11928/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11929/// loops.
11930static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11931 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11932 const TargetInstrInfo *TII, DebugLoc Dl,
11933 MachineRegisterInfo &MRI, Register OpSrcReg,
11934 Register OpDestReg, Register ElementCountReg,
11935 Register TotalIterationsReg, bool IsMemcpy) {
11936 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11937 // array, loop iteration counter, predication counter.
11938
11939 Register SrcPhiReg, CurrSrcReg;
11940 if (IsMemcpy) {
11941 // Current position in the src array
11942 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11943 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11944 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11945 .addUse(OpSrcReg)
11946 .addMBB(TpEntry)
11947 .addUse(CurrSrcReg)
11948 .addMBB(TpLoopBody);
11949 }
11950
11951 // Current position in the dest array
11952 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11953 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11954 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11955 .addUse(OpDestReg)
11956 .addMBB(TpEntry)
11957 .addUse(CurrDestReg)
11958 .addMBB(TpLoopBody);
11959
11960 // Current loop counter
11961 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11962 Register RemainingLoopIterationsReg =
11963 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11964 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11965 .addUse(TotalIterationsReg)
11966 .addMBB(TpEntry)
11967 .addUse(RemainingLoopIterationsReg)
11968 .addMBB(TpLoopBody);
11969
11970 // Predication counter
11971 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11972 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11973 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11974 .addUse(ElementCountReg)
11975 .addMBB(TpEntry)
11976 .addUse(RemainingElementsReg)
11977 .addMBB(TpLoopBody);
11978
11979 // Pass predication counter to VCTP
11980 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11981 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11982 .addUse(PredCounterPhiReg)
11984 .addReg(0)
11985 .addReg(0);
11986
11987 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11988 .addUse(PredCounterPhiReg)
11989 .addImm(16)
11991 .addReg(0);
11992
11993 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11994 Register SrcValueReg;
11995 if (IsMemcpy) {
11996 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11997 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11998 .addDef(CurrSrcReg)
11999 .addDef(SrcValueReg)
12000 .addReg(SrcPhiReg)
12001 .addImm(16)
12003 .addUse(VccrReg)
12004 .addReg(0);
12005 } else
12006 SrcValueReg = OpSrcReg;
12007
12008 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
12009 .addDef(CurrDestReg)
12010 .addUse(SrcValueReg)
12011 .addReg(DestPhiReg)
12012 .addImm(16)
12014 .addUse(VccrReg)
12015 .addReg(0);
12016
12017 // Add the pseudoInstrs for decrementing the loop counter and marking the
12018 // end:t2DoLoopDec and t2DoLoopEnd
12019 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
12020 .addUse(LoopCounterPhiReg)
12021 .addImm(1);
12022
12023 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
12024 .addUse(RemainingLoopIterationsReg)
12025 .addMBB(TpLoopBody);
12026
12027 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
12028 .addMBB(TpExit)
12030}
12031
12034 MachineBasicBlock *BB) const {
12035 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
12036 DebugLoc dl = MI.getDebugLoc();
12037 bool isThumb2 = Subtarget->isThumb2();
12038 switch (MI.getOpcode()) {
12039 default: {
12040 MI.print(errs());
12041 llvm_unreachable("Unexpected instr type to insert");
12042 }
12043
12044 // Thumb1 post-indexed loads are really just single-register LDMs.
12045 case ARM::tLDR_postidx: {
12046 MachineOperand Def(MI.getOperand(1));
12047 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12048 .add(Def) // Rn_wb
12049 .add(MI.getOperand(2)) // Rn
12050 .add(MI.getOperand(3)) // PredImm
12051 .add(MI.getOperand(4)) // PredReg
12052 .add(MI.getOperand(0)) // Rt
12053 .cloneMemRefs(MI);
12054 MI.eraseFromParent();
12055 return BB;
12056 }
12057
12058 case ARM::MVE_MEMCPYLOOPINST:
12059 case ARM::MVE_MEMSETLOOPINST: {
12060
12061 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12062 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12063 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12064 // adds the relevant instructions in the TP loop Body for generation of a
12065 // WLSTP loop.
12066
12067 // Below is relevant portion of the CFG after the transformation.
12068 // The Machine Basic Blocks are shown along with branch conditions (in
12069 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12070 // portion of the CFG and may not necessarily be the entry/exit of the
12071 // function.
12072
12073 // (Relevant) CFG after transformation:
12074 // TP entry MBB
12075 // |
12076 // |-----------------|
12077 // (n <= 0) (n > 0)
12078 // | |
12079 // | TP loop Body MBB<--|
12080 // | | |
12081 // \ |___________|
12082 // \ /
12083 // TP exit MBB
12084
12085 MachineFunction *MF = BB->getParent();
12086 MachineFunctionProperties &Properties = MF->getProperties();
12088
12089 Register OpDestReg = MI.getOperand(0).getReg();
12090 Register OpSrcReg = MI.getOperand(1).getReg();
12091 Register OpSizeReg = MI.getOperand(2).getReg();
12092
12093 // Allocate the required MBBs and add to parent function.
12094 MachineBasicBlock *TpEntry = BB;
12095 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12096 MachineBasicBlock *TpExit;
12097
12098 MF->push_back(TpLoopBody);
12099
12100 // If any instructions are present in the current block after
12101 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12102 // move the instructions into the newly created exit block. If there are no
12103 // instructions add an explicit branch to the FallThrough block and then
12104 // split.
12105 //
12106 // The split is required for two reasons:
12107 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12108 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12109 // need to be updated. splitAt() already handles this.
12110 TpExit = BB->splitAt(MI, false);
12111 if (TpExit == BB) {
12112 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12113 "block containing memcpy/memset Pseudo");
12114 TpExit = BB->getFallThrough();
12115 BuildMI(BB, dl, TII->get(ARM::t2B))
12116 .addMBB(TpExit)
12118 TpExit = BB->splitAt(MI, false);
12119 }
12120
12121 // Add logic for iteration count
12122 Register TotalIterationsReg =
12123 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12124
12125 // Add the vectorized (and predicated) loads/store instructions
12126 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12127 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12128 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12129
12130 // Required to avoid conflict with the MachineVerifier during testing.
12132
12133 // Connect the blocks
12134 TpEntry->addSuccessor(TpLoopBody);
12135 TpLoopBody->addSuccessor(TpLoopBody);
12136 TpLoopBody->addSuccessor(TpExit);
12137
12138 // Reorder for a more natural layout
12139 TpLoopBody->moveAfter(TpEntry);
12140 TpExit->moveAfter(TpLoopBody);
12141
12142 // Finally, remove the memcpy Pseudo Instruction
12143 MI.eraseFromParent();
12144
12145 // Return the exit block as it may contain other instructions requiring a
12146 // custom inserter
12147 return TpExit;
12148 }
12149
12150 // The Thumb2 pre-indexed stores have the same MI operands, they just
12151 // define them differently in the .td files from the isel patterns, so
12152 // they need pseudos.
12153 case ARM::t2STR_preidx:
12154 MI.setDesc(TII->get(ARM::t2STR_PRE));
12155 return BB;
12156 case ARM::t2STRB_preidx:
12157 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12158 return BB;
12159 case ARM::t2STRH_preidx:
12160 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12161 return BB;
12162
12163 case ARM::STRi_preidx:
12164 case ARM::STRBi_preidx: {
12165 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12166 : ARM::STRB_PRE_IMM;
12167 // Decode the offset.
12168 unsigned Offset = MI.getOperand(4).getImm();
12169 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12171 if (isSub)
12172 Offset = -Offset;
12173
12174 MachineMemOperand *MMO = *MI.memoperands_begin();
12175 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12176 .add(MI.getOperand(0)) // Rn_wb
12177 .add(MI.getOperand(1)) // Rt
12178 .add(MI.getOperand(2)) // Rn
12179 .addImm(Offset) // offset (skip GPR==zero_reg)
12180 .add(MI.getOperand(5)) // pred
12181 .add(MI.getOperand(6))
12182 .addMemOperand(MMO);
12183 MI.eraseFromParent();
12184 return BB;
12185 }
12186 case ARM::STRr_preidx:
12187 case ARM::STRBr_preidx:
12188 case ARM::STRH_preidx: {
12189 unsigned NewOpc;
12190 switch (MI.getOpcode()) {
12191 default: llvm_unreachable("unexpected opcode!");
12192 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12193 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12194 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12195 }
12196 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12197 for (const MachineOperand &MO : MI.operands())
12198 MIB.add(MO);
12199 MI.eraseFromParent();
12200 return BB;
12201 }
12202
12203 case ARM::tMOVCCr_pseudo: {
12204 // To "insert" a SELECT_CC instruction, we actually have to insert the
12205 // diamond control-flow pattern. The incoming instruction knows the
12206 // destination vreg to set, the condition code register to branch on, the
12207 // true/false values to select between, and a branch opcode to use.
12208 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12210
12211 // thisMBB:
12212 // ...
12213 // TrueVal = ...
12214 // cmpTY ccX, r1, r2
12215 // bCC copy1MBB
12216 // fallthrough --> copy0MBB
12217 MachineBasicBlock *thisMBB = BB;
12218 MachineFunction *F = BB->getParent();
12219 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12220 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12221 F->insert(It, copy0MBB);
12222 F->insert(It, sinkMBB);
12223
12224 // Set the call frame size on entry to the new basic blocks.
12225 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12226 copy0MBB->setCallFrameSize(CallFrameSize);
12227 sinkMBB->setCallFrameSize(CallFrameSize);
12228
12229 // Check whether CPSR is live past the tMOVCCr_pseudo.
12230 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12231 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12232 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12233 copy0MBB->addLiveIn(ARM::CPSR);
12234 sinkMBB->addLiveIn(ARM::CPSR);
12235 }
12236
12237 // Transfer the remainder of BB and its successor edges to sinkMBB.
12238 sinkMBB->splice(sinkMBB->begin(), BB,
12239 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12241
12242 BB->addSuccessor(copy0MBB);
12243 BB->addSuccessor(sinkMBB);
12244
12245 BuildMI(BB, dl, TII->get(ARM::tBcc))
12246 .addMBB(sinkMBB)
12247 .addImm(MI.getOperand(3).getImm())
12248 .addReg(MI.getOperand(4).getReg());
12249
12250 // copy0MBB:
12251 // %FalseValue = ...
12252 // # fallthrough to sinkMBB
12253 BB = copy0MBB;
12254
12255 // Update machine-CFG edges
12256 BB->addSuccessor(sinkMBB);
12257
12258 // sinkMBB:
12259 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12260 // ...
12261 BB = sinkMBB;
12262 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12263 .addReg(MI.getOperand(1).getReg())
12264 .addMBB(copy0MBB)
12265 .addReg(MI.getOperand(2).getReg())
12266 .addMBB(thisMBB);
12267
12268 MI.eraseFromParent(); // The pseudo instruction is gone now.
12269 return BB;
12270 }
12271
12272 case ARM::BCCi64:
12273 case ARM::BCCZi64: {
12274 // If there is an unconditional branch to the other successor, remove it.
12275 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12276
12277 // Compare both parts that make up the double comparison separately for
12278 // equality.
12279 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12280
12281 Register LHS1 = MI.getOperand(1).getReg();
12282 Register LHS2 = MI.getOperand(2).getReg();
12283 if (RHSisZero) {
12284 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12285 .addReg(LHS1)
12286 .addImm(0)
12288 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12289 .addReg(LHS2).addImm(0)
12290 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12291 } else {
12292 Register RHS1 = MI.getOperand(3).getReg();
12293 Register RHS2 = MI.getOperand(4).getReg();
12294 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12295 .addReg(LHS1)
12296 .addReg(RHS1)
12298 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12299 .addReg(LHS2).addReg(RHS2)
12300 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12301 }
12302
12303 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12304 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12305 if (MI.getOperand(0).getImm() == ARMCC::NE)
12306 std::swap(destMBB, exitMBB);
12307
12308 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12309 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12310 if (isThumb2)
12311 BuildMI(BB, dl, TII->get(ARM::t2B))
12312 .addMBB(exitMBB)
12314 else
12315 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12316
12317 MI.eraseFromParent(); // The pseudo instruction is gone now.
12318 return BB;
12319 }
12320
12321 case ARM::Int_eh_sjlj_setjmp:
12322 case ARM::Int_eh_sjlj_setjmp_nofp:
12323 case ARM::tInt_eh_sjlj_setjmp:
12324 case ARM::t2Int_eh_sjlj_setjmp:
12325 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12326 return BB;
12327
12328 case ARM::Int_eh_sjlj_setup_dispatch:
12329 EmitSjLjDispatchBlock(MI, BB);
12330 return BB;
12331
12332 case ARM::ABS:
12333 case ARM::t2ABS: {
12334 // To insert an ABS instruction, we have to insert the
12335 // diamond control-flow pattern. The incoming instruction knows the
12336 // source vreg to test against 0, the destination vreg to set,
12337 // the condition code register to branch on, the
12338 // true/false values to select between, and a branch opcode to use.
12339 // It transforms
12340 // V1 = ABS V0
12341 // into
12342 // V2 = MOVS V0
12343 // BCC (branch to SinkBB if V0 >= 0)
12344 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12345 // SinkBB: V1 = PHI(V2, V3)
12346 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12348 MachineFunction *Fn = BB->getParent();
12349 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12350 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12351 Fn->insert(BBI, RSBBB);
12352 Fn->insert(BBI, SinkBB);
12353
12354 Register ABSSrcReg = MI.getOperand(1).getReg();
12355 Register ABSDstReg = MI.getOperand(0).getReg();
12356 bool ABSSrcKIll = MI.getOperand(1).isKill();
12357 bool isThumb2 = Subtarget->isThumb2();
12359 // In Thumb mode S must not be specified if source register is the SP or
12360 // PC and if destination register is the SP, so restrict register class
12361 Register NewRsbDstReg = MRI.createVirtualRegister(
12362 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12363
12364 // Transfer the remainder of BB and its successor edges to sinkMBB.
12365 SinkBB->splice(SinkBB->begin(), BB,
12366 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12368
12369 BB->addSuccessor(RSBBB);
12370 BB->addSuccessor(SinkBB);
12371
12372 // fall through to SinkMBB
12373 RSBBB->addSuccessor(SinkBB);
12374
12375 // insert a cmp at the end of BB
12376 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12377 .addReg(ABSSrcReg)
12378 .addImm(0)
12380
12381 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12382 BuildMI(BB, dl,
12383 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12385
12386 // insert rsbri in RSBBB
12387 // Note: BCC and rsbri will be converted into predicated rsbmi
12388 // by if-conversion pass
12389 BuildMI(*RSBBB, RSBBB->begin(), dl,
12390 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12391 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12392 .addImm(0)
12394 .add(condCodeOp());
12395
12396 // insert PHI in SinkBB,
12397 // reuse ABSDstReg to not change uses of ABS instruction
12398 BuildMI(*SinkBB, SinkBB->begin(), dl,
12399 TII->get(ARM::PHI), ABSDstReg)
12400 .addReg(NewRsbDstReg).addMBB(RSBBB)
12401 .addReg(ABSSrcReg).addMBB(BB);
12402
12403 // remove ABS instruction
12404 MI.eraseFromParent();
12405
12406 // return last added BB
12407 return SinkBB;
12408 }
12409 case ARM::COPY_STRUCT_BYVAL_I32:
12410 ++NumLoopByVals;
12411 return EmitStructByval(MI, BB);
12412 case ARM::WIN__CHKSTK:
12413 return EmitLowered__chkstk(MI, BB);
12414 case ARM::WIN__DBZCHK:
12415 return EmitLowered__dbzchk(MI, BB);
12416 }
12417}
12418
12419/// Attaches vregs to MEMCPY that it will use as scratch registers
12420/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12421/// instead of as a custom inserter because we need the use list from the SDNode.
12422static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12423 MachineInstr &MI, const SDNode *Node) {
12424 bool isThumb1 = Subtarget->isThumb1Only();
12425
12426 DebugLoc DL = MI.getDebugLoc();
12427 MachineFunction *MF = MI.getParent()->getParent();
12429 MachineInstrBuilder MIB(*MF, MI);
12430
12431 // If the new dst/src is unused mark it as dead.
12432 if (!Node->hasAnyUseOfValue(0)) {
12433 MI.getOperand(0).setIsDead(true);
12434 }
12435 if (!Node->hasAnyUseOfValue(1)) {
12436 MI.getOperand(1).setIsDead(true);
12437 }
12438
12439 // The MEMCPY both defines and kills the scratch registers.
12440 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12441 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12442 : &ARM::GPRRegClass);
12444 }
12445}
12446
12448 SDNode *Node) const {
12449 if (MI.getOpcode() == ARM::MEMCPY) {
12450 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12451 return;
12452 }
12453
12454 const MCInstrDesc *MCID = &MI.getDesc();
12455 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12456 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12457 // operand is still set to noreg. If needed, set the optional operand's
12458 // register to CPSR, and remove the redundant implicit def.
12459 //
12460 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12461
12462 // Rename pseudo opcodes.
12463 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12464 unsigned ccOutIdx;
12465 if (NewOpc) {
12466 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12467 MCID = &TII->get(NewOpc);
12468
12469 assert(MCID->getNumOperands() ==
12470 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12471 && "converted opcode should be the same except for cc_out"
12472 " (and, on Thumb1, pred)");
12473
12474 MI.setDesc(*MCID);
12475
12476 // Add the optional cc_out operand
12477 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12478
12479 // On Thumb1, move all input operands to the end, then add the predicate
12480 if (Subtarget->isThumb1Only()) {
12481 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12482 MI.addOperand(MI.getOperand(1));
12483 MI.removeOperand(1);
12484 }
12485
12486 // Restore the ties
12487 for (unsigned i = MI.getNumOperands(); i--;) {
12488 const MachineOperand& op = MI.getOperand(i);
12489 if (op.isReg() && op.isUse()) {
12490 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12491 if (DefIdx != -1)
12492 MI.tieOperands(DefIdx, i);
12493 }
12494 }
12495
12497 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12498 ccOutIdx = 1;
12499 } else
12500 ccOutIdx = MCID->getNumOperands() - 1;
12501 } else
12502 ccOutIdx = MCID->getNumOperands() - 1;
12503
12504 // Any ARM instruction that sets the 's' bit should specify an optional
12505 // "cc_out" operand in the last operand position.
12506 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12507 assert(!NewOpc && "Optional cc_out operand required");
12508 return;
12509 }
12510 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12511 // since we already have an optional CPSR def.
12512 bool definesCPSR = false;
12513 bool deadCPSR = false;
12514 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12515 ++i) {
12516 const MachineOperand &MO = MI.getOperand(i);
12517 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12518 definesCPSR = true;
12519 if (MO.isDead())
12520 deadCPSR = true;
12521 MI.removeOperand(i);
12522 break;
12523 }
12524 }
12525 if (!definesCPSR) {
12526 assert(!NewOpc && "Optional cc_out operand required");
12527 return;
12528 }
12529 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12530 if (deadCPSR) {
12531 assert(!MI.getOperand(ccOutIdx).getReg() &&
12532 "expect uninitialized optional cc_out operand");
12533 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12534 if (!Subtarget->isThumb1Only())
12535 return;
12536 }
12537
12538 // If this instruction was defined with an optional CPSR def and its dag node
12539 // had a live implicit CPSR def, then activate the optional CPSR def.
12540 MachineOperand &MO = MI.getOperand(ccOutIdx);
12541 MO.setReg(ARM::CPSR);
12542 MO.setIsDef(true);
12543}
12544
12545//===----------------------------------------------------------------------===//
12546// ARM Optimization Hooks
12547//===----------------------------------------------------------------------===//
12548
12549// Helper function that checks if N is a null or all ones constant.
12550static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12552}
12553
12554// Return true if N is conditionally 0 or all ones.
12555// Detects these expressions where cc is an i1 value:
12556//
12557// (select cc 0, y) [AllOnes=0]
12558// (select cc y, 0) [AllOnes=0]
12559// (zext cc) [AllOnes=0]
12560// (sext cc) [AllOnes=0/1]
12561// (select cc -1, y) [AllOnes=1]
12562// (select cc y, -1) [AllOnes=1]
12563//
12564// Invert is set when N is the null/all ones constant when CC is false.
12565// OtherOp is set to the alternative value of N.
12567 SDValue &CC, bool &Invert,
12568 SDValue &OtherOp,
12569 SelectionDAG &DAG) {
12570 switch (N->getOpcode()) {
12571 default: return false;
12572 case ISD::SELECT: {
12573 CC = N->getOperand(0);
12574 SDValue N1 = N->getOperand(1);
12575 SDValue N2 = N->getOperand(2);
12576 if (isZeroOrAllOnes(N1, AllOnes)) {
12577 Invert = false;
12578 OtherOp = N2;
12579 return true;
12580 }
12581 if (isZeroOrAllOnes(N2, AllOnes)) {
12582 Invert = true;
12583 OtherOp = N1;
12584 return true;
12585 }
12586 return false;
12587 }
12588 case ISD::ZERO_EXTEND:
12589 // (zext cc) can never be the all ones value.
12590 if (AllOnes)
12591 return false;
12592 [[fallthrough]];
12593 case ISD::SIGN_EXTEND: {
12594 SDLoc dl(N);
12595 EVT VT = N->getValueType(0);
12596 CC = N->getOperand(0);
12597 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12598 return false;
12599 Invert = !AllOnes;
12600 if (AllOnes)
12601 // When looking for an AllOnes constant, N is an sext, and the 'other'
12602 // value is 0.
12603 OtherOp = DAG.getConstant(0, dl, VT);
12604 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12605 // When looking for a 0 constant, N can be zext or sext.
12606 OtherOp = DAG.getConstant(1, dl, VT);
12607 else
12608 OtherOp = DAG.getAllOnesConstant(dl, VT);
12609 return true;
12610 }
12611 }
12612}
12613
12614// Combine a constant select operand into its use:
12615//
12616// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12617// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12618// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12619// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12620// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12621//
12622// The transform is rejected if the select doesn't have a constant operand that
12623// is null, or all ones when AllOnes is set.
12624//
12625// Also recognize sext/zext from i1:
12626//
12627// (add (zext cc), x) -> (select cc (add x, 1), x)
12628// (add (sext cc), x) -> (select cc (add x, -1), x)
12629//
12630// These transformations eventually create predicated instructions.
12631//
12632// @param N The node to transform.
12633// @param Slct The N operand that is a select.
12634// @param OtherOp The other N operand (x above).
12635// @param DCI Context.
12636// @param AllOnes Require the select constant to be all ones instead of null.
12637// @returns The new node, or SDValue() on failure.
12638static
12641 bool AllOnes = false) {
12642 SelectionDAG &DAG = DCI.DAG;
12643 EVT VT = N->getValueType(0);
12644 SDValue NonConstantVal;
12645 SDValue CCOp;
12646 bool SwapSelectOps;
12647 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12648 NonConstantVal, DAG))
12649 return SDValue();
12650
12651 // Slct is now know to be the desired identity constant when CC is true.
12652 SDValue TrueVal = OtherOp;
12653 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12654 OtherOp, NonConstantVal);
12655 // Unless SwapSelectOps says CC should be false.
12656 if (SwapSelectOps)
12657 std::swap(TrueVal, FalseVal);
12658
12659 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12660 CCOp, TrueVal, FalseVal);
12661}
12662
12663// Attempt combineSelectAndUse on each operand of a commutative operator N.
12664static
12667 SDValue N0 = N->getOperand(0);
12668 SDValue N1 = N->getOperand(1);
12669 if (N0.getNode()->hasOneUse())
12670 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12671 return Result;
12672 if (N1.getNode()->hasOneUse())
12673 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12674 return Result;
12675 return SDValue();
12676}
12677
12679 // VUZP shuffle node.
12680 if (N->getOpcode() == ARMISD::VUZP)
12681 return true;
12682
12683 // "VUZP" on i32 is an alias for VTRN.
12684 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12685 return true;
12686
12687 return false;
12688}
12689
12692 const ARMSubtarget *Subtarget) {
12693 // Look for ADD(VUZP.0, VUZP.1).
12694 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12695 N0 == N1)
12696 return SDValue();
12697
12698 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12699 if (!N->getValueType(0).is64BitVector())
12700 return SDValue();
12701
12702 // Generate vpadd.
12703 SelectionDAG &DAG = DCI.DAG;
12704 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12705 SDLoc dl(N);
12706 SDNode *Unzip = N0.getNode();
12707 EVT VT = N->getValueType(0);
12708
12710 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12711 TLI.getPointerTy(DAG.getDataLayout())));
12712 Ops.push_back(Unzip->getOperand(0));
12713 Ops.push_back(Unzip->getOperand(1));
12714
12715 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12716}
12717
12720 const ARMSubtarget *Subtarget) {
12721 // Check for two extended operands.
12722 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12723 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12724 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12725 N1.getOpcode() == ISD::ZERO_EXTEND))
12726 return SDValue();
12727
12728 SDValue N00 = N0.getOperand(0);
12729 SDValue N10 = N1.getOperand(0);
12730
12731 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12732 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12733 N00 == N10)
12734 return SDValue();
12735
12736 // We only recognize Q register paddl here; this can't be reached until
12737 // after type legalization.
12738 if (!N00.getValueType().is64BitVector() ||
12740 return SDValue();
12741
12742 // Generate vpaddl.
12743 SelectionDAG &DAG = DCI.DAG;
12744 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12745 SDLoc dl(N);
12746 EVT VT = N->getValueType(0);
12747
12749 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12750 unsigned Opcode;
12751 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12752 Opcode = Intrinsic::arm_neon_vpaddls;
12753 else
12754 Opcode = Intrinsic::arm_neon_vpaddlu;
12755 Ops.push_back(DAG.getConstant(Opcode, dl,
12756 TLI.getPointerTy(DAG.getDataLayout())));
12757 EVT ElemTy = N00.getValueType().getVectorElementType();
12758 unsigned NumElts = VT.getVectorNumElements();
12759 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12760 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12761 N00.getOperand(0), N00.getOperand(1));
12762 Ops.push_back(Concat);
12763
12764 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12765}
12766
12767// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12768// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12769// much easier to match.
12770static SDValue
12773 const ARMSubtarget *Subtarget) {
12774 // Only perform optimization if after legalize, and if NEON is available. We
12775 // also expected both operands to be BUILD_VECTORs.
12776 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12777 || N0.getOpcode() != ISD::BUILD_VECTOR
12778 || N1.getOpcode() != ISD::BUILD_VECTOR)
12779 return SDValue();
12780
12781 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12782 EVT VT = N->getValueType(0);
12783 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12784 return SDValue();
12785
12786 // Check that the vector operands are of the right form.
12787 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12788 // operands, where N is the size of the formed vector.
12789 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12790 // index such that we have a pair wise add pattern.
12791
12792 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12794 return SDValue();
12795 SDValue Vec = N0->getOperand(0)->getOperand(0);
12796 SDNode *V = Vec.getNode();
12797 unsigned nextIndex = 0;
12798
12799 // For each operands to the ADD which are BUILD_VECTORs,
12800 // check to see if each of their operands are an EXTRACT_VECTOR with
12801 // the same vector and appropriate index.
12802 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12805
12806 SDValue ExtVec0 = N0->getOperand(i);
12807 SDValue ExtVec1 = N1->getOperand(i);
12808
12809 // First operand is the vector, verify its the same.
12810 if (V != ExtVec0->getOperand(0).getNode() ||
12811 V != ExtVec1->getOperand(0).getNode())
12812 return SDValue();
12813
12814 // Second is the constant, verify its correct.
12815 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12816 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12817
12818 // For the constant, we want to see all the even or all the odd.
12819 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12820 || C1->getZExtValue() != nextIndex+1)
12821 return SDValue();
12822
12823 // Increment index.
12824 nextIndex+=2;
12825 } else
12826 return SDValue();
12827 }
12828
12829 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12830 // we're using the entire input vector, otherwise there's a size/legality
12831 // mismatch somewhere.
12832 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12834 return SDValue();
12835
12836 // Create VPADDL node.
12837 SelectionDAG &DAG = DCI.DAG;
12838 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12839
12840 SDLoc dl(N);
12841
12842 // Build operand list.
12844 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12845 TLI.getPointerTy(DAG.getDataLayout())));
12846
12847 // Input is the vector.
12848 Ops.push_back(Vec);
12849
12850 // Get widened type and narrowed type.
12851 MVT widenType;
12852 unsigned numElem = VT.getVectorNumElements();
12853
12854 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12855 switch (inputLaneType.getSimpleVT().SimpleTy) {
12856 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12857 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12858 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12859 default:
12860 llvm_unreachable("Invalid vector element type for padd optimization.");
12861 }
12862
12863 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12864 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12865 return DAG.getNode(ExtOp, dl, VT, tmp);
12866}
12867
12869 if (V->getOpcode() == ISD::UMUL_LOHI ||
12870 V->getOpcode() == ISD::SMUL_LOHI)
12871 return V;
12872 return SDValue();
12873}
12874
12875static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12877 const ARMSubtarget *Subtarget) {
12878 if (!Subtarget->hasBaseDSP())
12879 return SDValue();
12880
12881 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12882 // accumulates the product into a 64-bit value. The 16-bit values will
12883 // be sign extended somehow or SRA'd into 32-bit values
12884 // (addc (adde (mul 16bit, 16bit), lo), hi)
12885 SDValue Mul = AddcNode->getOperand(0);
12886 SDValue Lo = AddcNode->getOperand(1);
12887 if (Mul.getOpcode() != ISD::MUL) {
12888 Lo = AddcNode->getOperand(0);
12889 Mul = AddcNode->getOperand(1);
12890 if (Mul.getOpcode() != ISD::MUL)
12891 return SDValue();
12892 }
12893
12894 SDValue SRA = AddeNode->getOperand(0);
12895 SDValue Hi = AddeNode->getOperand(1);
12896 if (SRA.getOpcode() != ISD::SRA) {
12897 SRA = AddeNode->getOperand(1);
12898 Hi = AddeNode->getOperand(0);
12899 if (SRA.getOpcode() != ISD::SRA)
12900 return SDValue();
12901 }
12902 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12903 if (Const->getZExtValue() != 31)
12904 return SDValue();
12905 } else
12906 return SDValue();
12907
12908 if (SRA.getOperand(0) != Mul)
12909 return SDValue();
12910
12911 SelectionDAG &DAG = DCI.DAG;
12912 SDLoc dl(AddcNode);
12913 unsigned Opcode = 0;
12914 SDValue Op0;
12915 SDValue Op1;
12916
12917 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12918 Opcode = ARMISD::SMLALBB;
12919 Op0 = Mul.getOperand(0);
12920 Op1 = Mul.getOperand(1);
12921 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12922 Opcode = ARMISD::SMLALBT;
12923 Op0 = Mul.getOperand(0);
12924 Op1 = Mul.getOperand(1).getOperand(0);
12925 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12926 Opcode = ARMISD::SMLALTB;
12927 Op0 = Mul.getOperand(0).getOperand(0);
12928 Op1 = Mul.getOperand(1);
12929 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12930 Opcode = ARMISD::SMLALTT;
12931 Op0 = Mul->getOperand(0).getOperand(0);
12932 Op1 = Mul->getOperand(1).getOperand(0);
12933 }
12934
12935 if (!Op0 || !Op1)
12936 return SDValue();
12937
12938 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12939 Op0, Op1, Lo, Hi);
12940 // Replace the ADDs' nodes uses by the MLA node's values.
12941 SDValue HiMLALResult(SMLAL.getNode(), 1);
12942 SDValue LoMLALResult(SMLAL.getNode(), 0);
12943
12944 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12945 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12946
12947 // Return original node to notify the driver to stop replacing.
12948 SDValue resNode(AddcNode, 0);
12949 return resNode;
12950}
12951
12954 const ARMSubtarget *Subtarget) {
12955 // Look for multiply add opportunities.
12956 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12957 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12958 // a glue link from the first add to the second add.
12959 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12960 // a S/UMLAL instruction.
12961 // UMUL_LOHI
12962 // / :lo \ :hi
12963 // V \ [no multiline comment]
12964 // loAdd -> ADDC |
12965 // \ :carry /
12966 // V V
12967 // ADDE <- hiAdd
12968 //
12969 // In the special case where only the higher part of a signed result is used
12970 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12971 // a constant with the exact value of 0x80000000, we recognize we are dealing
12972 // with a "rounded multiply and add" (or subtract) and transform it into
12973 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12974
12975 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12976 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12977 "Expect an ADDE or SUBE");
12978
12979 assert(AddeSubeNode->getNumOperands() == 3 &&
12980 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12981 "ADDE node has the wrong inputs");
12982
12983 // Check that we are chained to the right ADDC or SUBC node.
12984 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12985 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12986 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12987 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12988 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12989 return SDValue();
12990
12991 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12992 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12993
12994 // Check if the two operands are from the same mul_lohi node.
12995 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12996 return SDValue();
12997
12998 assert(AddcSubcNode->getNumValues() == 2 &&
12999 AddcSubcNode->getValueType(0) == MVT::i32 &&
13000 "Expect ADDC with two result values. First: i32");
13001
13002 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
13003 // maybe a SMLAL which multiplies two 16-bit values.
13004 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
13005 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
13006 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
13007 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
13008 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
13009 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
13010
13011 // Check for the triangle shape.
13012 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
13013 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
13014
13015 // Make sure that the ADDE/SUBE operands are not coming from the same node.
13016 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
13017 return SDValue();
13018
13019 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
13020 bool IsLeftOperandMUL = false;
13021 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
13022 if (MULOp == SDValue())
13023 MULOp = findMUL_LOHI(AddeSubeOp1);
13024 else
13025 IsLeftOperandMUL = true;
13026 if (MULOp == SDValue())
13027 return SDValue();
13028
13029 // Figure out the right opcode.
13030 unsigned Opc = MULOp->getOpcode();
13031 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
13032
13033 // Figure out the high and low input values to the MLAL node.
13034 SDValue *HiAddSub = nullptr;
13035 SDValue *LoMul = nullptr;
13036 SDValue *LowAddSub = nullptr;
13037
13038 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
13039 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
13040 return SDValue();
13041
13042 if (IsLeftOperandMUL)
13043 HiAddSub = &AddeSubeOp1;
13044 else
13045 HiAddSub = &AddeSubeOp0;
13046
13047 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13048 // whose low result is fed to the ADDC/SUBC we are checking.
13049
13050 if (AddcSubcOp0 == MULOp.getValue(0)) {
13051 LoMul = &AddcSubcOp0;
13052 LowAddSub = &AddcSubcOp1;
13053 }
13054 if (AddcSubcOp1 == MULOp.getValue(0)) {
13055 LoMul = &AddcSubcOp1;
13056 LowAddSub = &AddcSubcOp0;
13057 }
13058
13059 if (!LoMul)
13060 return SDValue();
13061
13062 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13063 // the replacement below will create a cycle.
13064 if (AddcSubcNode == HiAddSub->getNode() ||
13065 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13066 return SDValue();
13067
13068 // Create the merged node.
13069 SelectionDAG &DAG = DCI.DAG;
13070
13071 // Start building operand list.
13073 Ops.push_back(LoMul->getOperand(0));
13074 Ops.push_back(LoMul->getOperand(1));
13075
13076 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13077 // the case, we must be doing signed multiplication and only use the higher
13078 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13079 // addition or subtraction with the value of 0x800000.
13080 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13081 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13082 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13083 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13084 0x80000000) {
13085 Ops.push_back(*HiAddSub);
13086 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13087 FinalOpc = ARMISD::SMMLSR;
13088 } else {
13089 FinalOpc = ARMISD::SMMLAR;
13090 }
13091 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13092 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13093
13094 return SDValue(AddeSubeNode, 0);
13095 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13096 // SMMLS is generated during instruction selection and the rest of this
13097 // function can not handle the case where AddcSubcNode is a SUBC.
13098 return SDValue();
13099
13100 // Finish building the operand list for {U/S}MLAL
13101 Ops.push_back(*LowAddSub);
13102 Ops.push_back(*HiAddSub);
13103
13104 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13105 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13106
13107 // Replace the ADDs' nodes uses by the MLA node's values.
13108 SDValue HiMLALResult(MLALNode.getNode(), 1);
13109 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13110
13111 SDValue LoMLALResult(MLALNode.getNode(), 0);
13112 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13113
13114 // Return original node to notify the driver to stop replacing.
13115 return SDValue(AddeSubeNode, 0);
13116}
13117
13120 const ARMSubtarget *Subtarget) {
13121 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13122 // While trying to combine for the other MLAL nodes, first search for the
13123 // chance to use UMAAL. Check if Addc uses a node which has already
13124 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13125 // as the addend, and it's handled in PerformUMLALCombine.
13126
13127 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13128 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13129
13130 // Check that we have a glued ADDC node.
13131 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13132 if (AddcNode->getOpcode() != ARMISD::ADDC)
13133 return SDValue();
13134
13135 // Find the converted UMAAL or quit if it doesn't exist.
13136 SDNode *UmlalNode = nullptr;
13137 SDValue AddHi;
13138 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13139 UmlalNode = AddcNode->getOperand(0).getNode();
13140 AddHi = AddcNode->getOperand(1);
13141 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13142 UmlalNode = AddcNode->getOperand(1).getNode();
13143 AddHi = AddcNode->getOperand(0);
13144 } else {
13145 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13146 }
13147
13148 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13149 // the ADDC as well as Zero.
13150 if (!isNullConstant(UmlalNode->getOperand(3)))
13151 return SDValue();
13152
13153 if ((isNullConstant(AddeNode->getOperand(0)) &&
13154 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13155 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13156 isNullConstant(AddeNode->getOperand(1)))) {
13157 SelectionDAG &DAG = DCI.DAG;
13158 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13159 UmlalNode->getOperand(2), AddHi };
13160 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13161 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13162
13163 // Replace the ADDs' nodes uses by the UMAAL node's values.
13164 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13165 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13166
13167 // Return original node to notify the driver to stop replacing.
13168 return SDValue(AddeNode, 0);
13169 }
13170 return SDValue();
13171}
13172
13174 const ARMSubtarget *Subtarget) {
13175 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13176 return SDValue();
13177
13178 // Check that we have a pair of ADDC and ADDE as operands.
13179 // Both addends of the ADDE must be zero.
13180 SDNode* AddcNode = N->getOperand(2).getNode();
13181 SDNode* AddeNode = N->getOperand(3).getNode();
13182 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13183 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13184 isNullConstant(AddeNode->getOperand(0)) &&
13185 isNullConstant(AddeNode->getOperand(1)) &&
13186 (AddeNode->getOperand(2).getNode() == AddcNode))
13187 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13188 DAG.getVTList(MVT::i32, MVT::i32),
13189 {N->getOperand(0), N->getOperand(1),
13190 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13191 else
13192 return SDValue();
13193}
13194
13197 const ARMSubtarget *Subtarget) {
13198 SelectionDAG &DAG(DCI.DAG);
13199
13200 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13201 // (SUBC (ADDE 0, 0, C), 1) -> C
13202 SDValue LHS = N->getOperand(0);
13203 SDValue RHS = N->getOperand(1);
13204 if (LHS->getOpcode() == ARMISD::ADDE &&
13205 isNullConstant(LHS->getOperand(0)) &&
13206 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13207 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13208 }
13209 }
13210
13211 if (Subtarget->isThumb1Only()) {
13212 SDValue RHS = N->getOperand(1);
13213 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13214 int32_t imm = C->getSExtValue();
13215 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13216 SDLoc DL(N);
13217 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13218 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13219 : ARMISD::ADDC;
13220 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13221 }
13222 }
13223 }
13224
13225 return SDValue();
13226}
13227
13230 const ARMSubtarget *Subtarget) {
13231 if (Subtarget->isThumb1Only()) {
13232 SelectionDAG &DAG = DCI.DAG;
13233 SDValue RHS = N->getOperand(1);
13234 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13235 int64_t imm = C->getSExtValue();
13236 if (imm < 0) {
13237 SDLoc DL(N);
13238
13239 // The with-carry-in form matches bitwise not instead of the negation.
13240 // Effectively, the inverse interpretation of the carry flag already
13241 // accounts for part of the negation.
13242 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13243
13244 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13245 : ARMISD::ADDE;
13246 return DAG.getNode(Opcode, DL, N->getVTList(),
13247 N->getOperand(0), RHS, N->getOperand(2));
13248 }
13249 }
13250 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13251 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13252 }
13253 return SDValue();
13254}
13255
13258 const ARMSubtarget *Subtarget) {
13259 if (!Subtarget->hasMVEIntegerOps())
13260 return SDValue();
13261
13262 SDLoc dl(N);
13263 SDValue SetCC;
13264 SDValue LHS;
13265 SDValue RHS;
13267 SDValue TrueVal;
13268 SDValue FalseVal;
13269
13270 if (N->getOpcode() == ISD::SELECT &&
13271 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13272 SetCC = N->getOperand(0);
13273 LHS = SetCC->getOperand(0);
13274 RHS = SetCC->getOperand(1);
13275 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13276 TrueVal = N->getOperand(1);
13277 FalseVal = N->getOperand(2);
13278 } else if (N->getOpcode() == ISD::SELECT_CC) {
13279 LHS = N->getOperand(0);
13280 RHS = N->getOperand(1);
13281 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13282 TrueVal = N->getOperand(2);
13283 FalseVal = N->getOperand(3);
13284 } else {
13285 return SDValue();
13286 }
13287
13288 unsigned int Opcode = 0;
13289 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13290 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13291 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13292 Opcode = ARMISD::VMINVu;
13293 if (CC == ISD::SETUGT)
13294 std::swap(TrueVal, FalseVal);
13295 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13296 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13297 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13298 Opcode = ARMISD::VMINVs;
13299 if (CC == ISD::SETGT)
13300 std::swap(TrueVal, FalseVal);
13301 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13302 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13303 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13304 Opcode = ARMISD::VMAXVu;
13305 if (CC == ISD::SETULT)
13306 std::swap(TrueVal, FalseVal);
13307 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13308 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13309 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13310 Opcode = ARMISD::VMAXVs;
13311 if (CC == ISD::SETLT)
13312 std::swap(TrueVal, FalseVal);
13313 } else
13314 return SDValue();
13315
13316 // Normalise to the right hand side being the vector reduction
13317 switch (TrueVal->getOpcode()) {
13322 std::swap(LHS, RHS);
13323 std::swap(TrueVal, FalseVal);
13324 break;
13325 }
13326
13327 EVT VectorType = FalseVal->getOperand(0).getValueType();
13328
13329 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13330 VectorType != MVT::v4i32)
13331 return SDValue();
13332
13333 EVT VectorScalarType = VectorType.getVectorElementType();
13334
13335 // The values being selected must also be the ones being compared
13336 if (TrueVal != LHS || FalseVal != RHS)
13337 return SDValue();
13338
13339 EVT LeftType = LHS->getValueType(0);
13340 EVT RightType = RHS->getValueType(0);
13341
13342 // The types must match the reduced type too
13343 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13344 return SDValue();
13345
13346 // Legalise the scalar to an i32
13347 if (VectorScalarType != MVT::i32)
13348 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13349
13350 // Generate the reduction as an i32 for legalisation purposes
13351 auto Reduction =
13352 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13353
13354 // The result isn't actually an i32 so truncate it back to its original type
13355 if (VectorScalarType != MVT::i32)
13356 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13357
13358 return Reduction;
13359}
13360
13361// A special combine for the vqdmulh family of instructions. This is one of the
13362// potential set of patterns that could patch this instruction. The base pattern
13363// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13364// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13365// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13366// the max is unnecessary.
13368 EVT VT = N->getValueType(0);
13369 SDValue Shft;
13370 ConstantSDNode *Clamp;
13371
13372 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13373 return SDValue();
13374
13375 if (N->getOpcode() == ISD::SMIN) {
13376 Shft = N->getOperand(0);
13377 Clamp = isConstOrConstSplat(N->getOperand(1));
13378 } else if (N->getOpcode() == ISD::VSELECT) {
13379 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13380 SDValue Cmp = N->getOperand(0);
13381 if (Cmp.getOpcode() != ISD::SETCC ||
13382 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13383 Cmp.getOperand(0) != N->getOperand(1) ||
13384 Cmp.getOperand(1) != N->getOperand(2))
13385 return SDValue();
13386 Shft = N->getOperand(1);
13387 Clamp = isConstOrConstSplat(N->getOperand(2));
13388 } else
13389 return SDValue();
13390
13391 if (!Clamp)
13392 return SDValue();
13393
13394 MVT ScalarType;
13395 int ShftAmt = 0;
13396 switch (Clamp->getSExtValue()) {
13397 case (1 << 7) - 1:
13398 ScalarType = MVT::i8;
13399 ShftAmt = 7;
13400 break;
13401 case (1 << 15) - 1:
13402 ScalarType = MVT::i16;
13403 ShftAmt = 15;
13404 break;
13405 case (1ULL << 31) - 1:
13406 ScalarType = MVT::i32;
13407 ShftAmt = 31;
13408 break;
13409 default:
13410 return SDValue();
13411 }
13412
13413 if (Shft.getOpcode() != ISD::SRA)
13414 return SDValue();
13416 if (!N1 || N1->getSExtValue() != ShftAmt)
13417 return SDValue();
13418
13419 SDValue Mul = Shft.getOperand(0);
13420 if (Mul.getOpcode() != ISD::MUL)
13421 return SDValue();
13422
13423 SDValue Ext0 = Mul.getOperand(0);
13424 SDValue Ext1 = Mul.getOperand(1);
13425 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13426 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13427 return SDValue();
13428 EVT VecVT = Ext0.getOperand(0).getValueType();
13429 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13430 return SDValue();
13431 if (Ext1.getOperand(0).getValueType() != VecVT ||
13432 VecVT.getScalarType() != ScalarType ||
13433 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13434 return SDValue();
13435
13436 SDLoc DL(Mul);
13437 unsigned LegalLanes = 128 / (ShftAmt + 1);
13438 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13439 // For types smaller than legal vectors extend to be legal and only use needed
13440 // lanes.
13441 if (VecVT.getSizeInBits() < 128) {
13442 EVT ExtVecVT =
13444 VecVT.getVectorNumElements());
13445 SDValue Inp0 =
13446 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13447 SDValue Inp1 =
13448 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13449 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13450 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13451 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13452 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13453 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13454 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13455 }
13456
13457 // For larger types, split into legal sized chunks.
13458 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13459 unsigned NumParts = VecVT.getSizeInBits() / 128;
13461 for (unsigned I = 0; I < NumParts; ++I) {
13462 SDValue Inp0 =
13463 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13464 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13465 SDValue Inp1 =
13466 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13467 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13468 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13469 Parts.push_back(VQDMULH);
13470 }
13471 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13472 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13473}
13474
13477 const ARMSubtarget *Subtarget) {
13478 if (!Subtarget->hasMVEIntegerOps())
13479 return SDValue();
13480
13481 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13482 return V;
13483
13484 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13485 //
13486 // We need to re-implement this optimization here as the implementation in the
13487 // Target-Independent DAGCombiner does not handle the kind of constant we make
13488 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13489 // good reason, allowing truncation there would break other targets).
13490 //
13491 // Currently, this is only done for MVE, as it's the only target that benefits
13492 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13493 if (N->getOperand(0).getOpcode() != ISD::XOR)
13494 return SDValue();
13495 SDValue XOR = N->getOperand(0);
13496
13497 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13498 // It is important to check with truncation allowed as the BUILD_VECTORs we
13499 // generate in those situations will truncate their operands.
13500 ConstantSDNode *Const =
13501 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13502 /*AllowTruncation*/ true);
13503 if (!Const || !Const->isOne())
13504 return SDValue();
13505
13506 // Rewrite into vselect(cond, rhs, lhs).
13507 SDValue Cond = XOR->getOperand(0);
13508 SDValue LHS = N->getOperand(1);
13509 SDValue RHS = N->getOperand(2);
13510 EVT Type = N->getValueType(0);
13511 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13512}
13513
13514// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13517 const ARMSubtarget *Subtarget) {
13518 SDValue Op0 = N->getOperand(0);
13519 SDValue Op1 = N->getOperand(1);
13520 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13521 EVT VT = N->getValueType(0);
13522
13523 if (!Subtarget->hasMVEIntegerOps() ||
13525 return SDValue();
13526
13527 if (CC == ISD::SETUGE) {
13528 std::swap(Op0, Op1);
13529 CC = ISD::SETULT;
13530 }
13531
13532 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13534 return SDValue();
13535
13536 // Check first operand is BuildVector of 0,1,2,...
13537 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13538 if (!Op0.getOperand(I).isUndef() &&
13539 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13540 Op0.getConstantOperandVal(I) == I))
13541 return SDValue();
13542 }
13543
13544 // The second is a Splat of Op1S
13545 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13546 if (!Op1S)
13547 return SDValue();
13548
13549 unsigned Opc;
13550 switch (VT.getVectorNumElements()) {
13551 case 2:
13552 Opc = Intrinsic::arm_mve_vctp64;
13553 break;
13554 case 4:
13555 Opc = Intrinsic::arm_mve_vctp32;
13556 break;
13557 case 8:
13558 Opc = Intrinsic::arm_mve_vctp16;
13559 break;
13560 case 16:
13561 Opc = Intrinsic::arm_mve_vctp8;
13562 break;
13563 default:
13564 return SDValue();
13565 }
13566
13567 SDLoc DL(N);
13568 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13569 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13570 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13571}
13572
13573/// PerformADDECombine - Target-specific dag combine transform from
13574/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13575/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13578 const ARMSubtarget *Subtarget) {
13579 // Only ARM and Thumb2 support UMLAL/SMLAL.
13580 if (Subtarget->isThumb1Only())
13581 return PerformAddeSubeCombine(N, DCI, Subtarget);
13582
13583 // Only perform the checks after legalize when the pattern is available.
13584 if (DCI.isBeforeLegalize()) return SDValue();
13585
13586 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13587}
13588
13589/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13590/// operands N0 and N1. This is a helper for PerformADDCombine that is
13591/// called with the default operands, and if that fails, with commuted
13592/// operands.
13595 const ARMSubtarget *Subtarget){
13596 // Attempt to create vpadd for this add.
13597 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13598 return Result;
13599
13600 // Attempt to create vpaddl for this add.
13601 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13602 return Result;
13603 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13604 Subtarget))
13605 return Result;
13606
13607 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13608 if (N0.getNode()->hasOneUse())
13609 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13610 return Result;
13611 return SDValue();
13612}
13613
13615 EVT VT = N->getValueType(0);
13616 SDValue N0 = N->getOperand(0);
13617 SDValue N1 = N->getOperand(1);
13618 SDLoc dl(N);
13619
13620 auto IsVecReduce = [](SDValue Op) {
13621 switch (Op.getOpcode()) {
13622 case ISD::VECREDUCE_ADD:
13623 case ARMISD::VADDVs:
13624 case ARMISD::VADDVu:
13625 case ARMISD::VMLAVs:
13626 case ARMISD::VMLAVu:
13627 return true;
13628 }
13629 return false;
13630 };
13631
13632 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13633 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13634 // add(add(X, vecreduce(Y)), vecreduce(Z))
13635 // to make better use of vaddva style instructions.
13636 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13637 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13638 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13639 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13640 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13641 }
13642 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13643 // add(add(add(A, C), reduce(B)), reduce(D))
13644 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13645 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13646 unsigned N0RedOp = 0;
13647 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13648 N0RedOp = 1;
13649 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13650 return SDValue();
13651 }
13652
13653 unsigned N1RedOp = 0;
13654 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13655 N1RedOp = 1;
13656 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13657 return SDValue();
13658
13659 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13660 N1.getOperand(1 - N1RedOp));
13661 SDValue Add1 =
13662 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13663 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13664 }
13665 return SDValue();
13666 };
13667 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13668 return R;
13669 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13670 return R;
13671
13672 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13673 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13674 // by ascending load offsets. This can help cores prefetch if the order of
13675 // loads is more predictable.
13676 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13677 // Check if two reductions are known to load data where one is before/after
13678 // another. Return negative if N0 loads data before N1, positive if N1 is
13679 // before N0 and 0 otherwise if nothing is known.
13680 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13681 // Look through to the first operand of a MUL, for the VMLA case.
13682 // Currently only looks at the first operand, in the hope they are equal.
13683 if (N0.getOpcode() == ISD::MUL)
13684 N0 = N0.getOperand(0);
13685 if (N1.getOpcode() == ISD::MUL)
13686 N1 = N1.getOperand(0);
13687
13688 // Return true if the two operands are loads to the same object and the
13689 // offset of the first is known to be less than the offset of the second.
13690 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13691 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13692 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13693 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13694 Load1->isIndexed())
13695 return 0;
13696
13697 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13698 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13699
13700 if (!BaseLocDecomp0.getBase() ||
13701 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13702 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13703 return 0;
13704 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13705 return -1;
13706 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13707 return 1;
13708 return 0;
13709 };
13710
13711 SDValue X;
13712 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13713 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13714 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13715 N0.getOperand(1).getOperand(0));
13716 if (IsBefore < 0) {
13717 X = N0.getOperand(0);
13718 N0 = N0.getOperand(1);
13719 } else if (IsBefore > 0) {
13720 X = N0.getOperand(1);
13721 N0 = N0.getOperand(0);
13722 } else
13723 return SDValue();
13724 } else if (IsVecReduce(N0.getOperand(0))) {
13725 X = N0.getOperand(1);
13726 N0 = N0.getOperand(0);
13727 } else if (IsVecReduce(N0.getOperand(1))) {
13728 X = N0.getOperand(0);
13729 N0 = N0.getOperand(1);
13730 } else
13731 return SDValue();
13732 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13733 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13734 // Note this is backward to how you would expect. We create
13735 // add(reduce(load + 16), reduce(load + 0)) so that the
13736 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13737 // the X as VADDV(load + 0)
13738 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13739 } else
13740 return SDValue();
13741
13742 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13743 return SDValue();
13744
13745 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13746 return SDValue();
13747
13748 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13749 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13750 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13751 };
13752 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13753 return R;
13754 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13755 return R;
13756 return SDValue();
13757}
13758
13760 const ARMSubtarget *Subtarget) {
13761 if (!Subtarget->hasMVEIntegerOps())
13762 return SDValue();
13763
13765 return R;
13766
13767 EVT VT = N->getValueType(0);
13768 SDValue N0 = N->getOperand(0);
13769 SDValue N1 = N->getOperand(1);
13770 SDLoc dl(N);
13771
13772 if (VT != MVT::i64)
13773 return SDValue();
13774
13775 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13776 // will look like:
13777 // t1: i32,i32 = ARMISD::VADDLVs x
13778 // t2: i64 = build_pair t1, t1:1
13779 // t3: i64 = add t2, y
13780 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13781 // the add to be simplified separately.
13782 // We also need to check for sext / zext and commutitive adds.
13783 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13784 SDValue NB) {
13785 if (NB->getOpcode() != ISD::BUILD_PAIR)
13786 return SDValue();
13787 SDValue VecRed = NB->getOperand(0);
13788 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13789 VecRed.getResNo() != 0 ||
13790 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13791 return SDValue();
13792
13793 if (VecRed->getOpcode() == OpcodeA) {
13794 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13795 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13796 VecRed.getOperand(0), VecRed.getOperand(1));
13797 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13798 }
13799
13801 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13802
13803 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13804 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13805 Ops.push_back(VecRed->getOperand(I));
13806 SDValue Red =
13807 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13808 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13809 SDValue(Red.getNode(), 1));
13810 };
13811
13812 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13813 return M;
13814 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13815 return M;
13816 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13817 return M;
13818 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13819 return M;
13820 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13821 return M;
13822 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13823 return M;
13824 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13825 return M;
13826 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13827 return M;
13828 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13829 return M;
13830 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13831 return M;
13832 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13833 return M;
13834 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13835 return M;
13836 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13837 return M;
13838 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13839 return M;
13840 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13841 return M;
13842 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13843 return M;
13844 return SDValue();
13845}
13846
13847bool
13849 CombineLevel Level) const {
13850 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13851 N->getOpcode() == ISD::SRL) &&
13852 "Expected shift op");
13853
13854 SDValue ShiftLHS = N->getOperand(0);
13855 if (!ShiftLHS->hasOneUse())
13856 return false;
13857
13858 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13859 !ShiftLHS.getOperand(0)->hasOneUse())
13860 return false;
13861
13862 if (Level == BeforeLegalizeTypes)
13863 return true;
13864
13865 if (N->getOpcode() != ISD::SHL)
13866 return true;
13867
13868 if (Subtarget->isThumb1Only()) {
13869 // Avoid making expensive immediates by commuting shifts. (This logic
13870 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13871 // for free.)
13872 if (N->getOpcode() != ISD::SHL)
13873 return true;
13874 SDValue N1 = N->getOperand(0);
13875 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13876 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13877 return true;
13878 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13879 if (Const->getAPIntValue().ult(256))
13880 return false;
13881 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13882 Const->getAPIntValue().sgt(-256))
13883 return false;
13884 }
13885 return true;
13886 }
13887
13888 // Turn off commute-with-shift transform after legalization, so it doesn't
13889 // conflict with PerformSHLSimplify. (We could try to detect when
13890 // PerformSHLSimplify would trigger more precisely, but it isn't
13891 // really necessary.)
13892 return false;
13893}
13894
13896 const SDNode *N) const {
13897 assert(N->getOpcode() == ISD::XOR &&
13898 (N->getOperand(0).getOpcode() == ISD::SHL ||
13899 N->getOperand(0).getOpcode() == ISD::SRL) &&
13900 "Expected XOR(SHIFT) pattern");
13901
13902 // Only commute if the entire NOT mask is a hidden shifted mask.
13903 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13904 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13905 if (XorC && ShiftC) {
13906 unsigned MaskIdx, MaskLen;
13907 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13908 unsigned ShiftAmt = ShiftC->getZExtValue();
13909 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13910 if (N->getOperand(0).getOpcode() == ISD::SHL)
13911 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13912 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13913 }
13914 }
13915
13916 return false;
13917}
13918
13920 const SDNode *N, CombineLevel Level) const {
13921 assert(((N->getOpcode() == ISD::SHL &&
13922 N->getOperand(0).getOpcode() == ISD::SRL) ||
13923 (N->getOpcode() == ISD::SRL &&
13924 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13925 "Expected shift-shift mask");
13926
13927 if (!Subtarget->isThumb1Only())
13928 return true;
13929
13930 if (Level == BeforeLegalizeTypes)
13931 return true;
13932
13933 return false;
13934}
13935
13937 EVT VT) const {
13938 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13939}
13940
13942 if (!Subtarget->hasNEON()) {
13943 if (Subtarget->isThumb1Only())
13944 return VT.getScalarSizeInBits() <= 32;
13945 return true;
13946 }
13947 return VT.isScalarInteger();
13948}
13949
13951 EVT VT) const {
13952 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13953 return false;
13954
13955 switch (FPVT.getSimpleVT().SimpleTy) {
13956 case MVT::f16:
13957 return Subtarget->hasVFP2Base();
13958 case MVT::f32:
13959 return Subtarget->hasVFP2Base();
13960 case MVT::f64:
13961 return Subtarget->hasFP64();
13962 case MVT::v4f32:
13963 case MVT::v8f16:
13964 return Subtarget->hasMVEFloatOps();
13965 default:
13966 return false;
13967 }
13968}
13969
13972 const ARMSubtarget *ST) {
13973 // Allow the generic combiner to identify potential bswaps.
13974 if (DCI.isBeforeLegalize())
13975 return SDValue();
13976
13977 // DAG combiner will fold:
13978 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13979 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13980 // Other code patterns that can be also be modified have the following form:
13981 // b + ((a << 1) | 510)
13982 // b + ((a << 1) & 510)
13983 // b + ((a << 1) ^ 510)
13984 // b + ((a << 1) + 510)
13985
13986 // Many instructions can perform the shift for free, but it requires both
13987 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13988 // instruction will needed. So, unfold back to the original pattern if:
13989 // - if c1 and c2 are small enough that they don't require mov imms.
13990 // - the user(s) of the node can perform an shl
13991
13992 // No shifted operands for 16-bit instructions.
13993 if (ST->isThumb() && ST->isThumb1Only())
13994 return SDValue();
13995
13996 // Check that all the users could perform the shl themselves.
13997 for (auto *U : N->users()) {
13998 switch(U->getOpcode()) {
13999 default:
14000 return SDValue();
14001 case ISD::SUB:
14002 case ISD::ADD:
14003 case ISD::AND:
14004 case ISD::OR:
14005 case ISD::XOR:
14006 case ISD::SETCC:
14007 case ARMISD::CMP:
14008 // Check that the user isn't already using a constant because there
14009 // aren't any instructions that support an immediate operand and a
14010 // shifted operand.
14011 if (isa<ConstantSDNode>(U->getOperand(0)) ||
14012 isa<ConstantSDNode>(U->getOperand(1)))
14013 return SDValue();
14014
14015 // Check that it's not already using a shift.
14016 if (U->getOperand(0).getOpcode() == ISD::SHL ||
14017 U->getOperand(1).getOpcode() == ISD::SHL)
14018 return SDValue();
14019 break;
14020 }
14021 }
14022
14023 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
14024 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
14025 return SDValue();
14026
14027 if (N->getOperand(0).getOpcode() != ISD::SHL)
14028 return SDValue();
14029
14030 SDValue SHL = N->getOperand(0);
14031
14032 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14033 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
14034 if (!C1ShlC2 || !C2)
14035 return SDValue();
14036
14037 APInt C2Int = C2->getAPIntValue();
14038 APInt C1Int = C1ShlC2->getAPIntValue();
14039 unsigned C2Width = C2Int.getBitWidth();
14040 if (C2Int.uge(C2Width))
14041 return SDValue();
14042 uint64_t C2Value = C2Int.getZExtValue();
14043
14044 // Check that performing a lshr will not lose any information.
14045 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
14046 if ((C1Int & Mask) != C1Int)
14047 return SDValue();
14048
14049 // Shift the first constant.
14050 C1Int.lshrInPlace(C2Int);
14051
14052 // The immediates are encoded as an 8-bit value that can be rotated.
14053 auto LargeImm = [](const APInt &Imm) {
14054 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
14055 return Imm.getBitWidth() - Zeros > 8;
14056 };
14057
14058 if (LargeImm(C1Int) || LargeImm(C2Int))
14059 return SDValue();
14060
14061 SelectionDAG &DAG = DCI.DAG;
14062 SDLoc dl(N);
14063 SDValue X = SHL.getOperand(0);
14064 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14065 DAG.getConstant(C1Int, dl, MVT::i32));
14066 // Shift left to compensate for the lshr of C1Int.
14067 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14068
14069 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14070 SHL.dump(); N->dump());
14071 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14072 return Res;
14073}
14074
14075
14076/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14077///
14080 const ARMSubtarget *Subtarget) {
14081 SDValue N0 = N->getOperand(0);
14082 SDValue N1 = N->getOperand(1);
14083
14084 // Only works one way, because it needs an immediate operand.
14085 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14086 return Result;
14087
14088 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14089 return Result;
14090
14091 // First try with the default operand order.
14092 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14093 return Result;
14094
14095 // If that didn't work, try again with the operands commuted.
14096 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14097}
14098
14099// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14100// providing -X is as cheap as X (currently, just a constant).
14102 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14103 return SDValue();
14104 SDValue CSINC = N->getOperand(1);
14105 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14106 return SDValue();
14107
14108 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14109 if (!X)
14110 return SDValue();
14111
14112 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14113 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14114 CSINC.getOperand(0)),
14115 CSINC.getOperand(1), CSINC.getOperand(2),
14116 CSINC.getOperand(3));
14117}
14118
14119/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14120///
14123 const ARMSubtarget *Subtarget) {
14124 SDValue N0 = N->getOperand(0);
14125 SDValue N1 = N->getOperand(1);
14126
14127 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14128 if (N1.getNode()->hasOneUse())
14129 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14130 return Result;
14131
14132 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14133 return R;
14134
14135 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14136 return SDValue();
14137
14138 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14139 // so that we can readily pattern match more mve instructions which can use
14140 // a scalar operand.
14141 SDValue VDup = N->getOperand(1);
14142 if (VDup->getOpcode() != ARMISD::VDUP)
14143 return SDValue();
14144
14145 SDValue VMov = N->getOperand(0);
14146 if (VMov->getOpcode() == ISD::BITCAST)
14147 VMov = VMov->getOperand(0);
14148
14149 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14150 return SDValue();
14151
14152 SDLoc dl(N);
14153 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14154 DCI.DAG.getConstant(0, dl, MVT::i32),
14155 VDup->getOperand(0));
14156 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14157}
14158
14159/// PerformVMULCombine
14160/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14161/// special multiplier accumulator forwarding.
14162/// vmul d3, d0, d2
14163/// vmla d3, d1, d2
14164/// is faster than
14165/// vadd d3, d0, d1
14166/// vmul d3, d3, d2
14167// However, for (A + B) * (A + B),
14168// vadd d2, d0, d1
14169// vmul d3, d0, d2
14170// vmla d3, d1, d2
14171// is slower than
14172// vadd d2, d0, d1
14173// vmul d3, d2, d2
14176 const ARMSubtarget *Subtarget) {
14177 if (!Subtarget->hasVMLxForwarding())
14178 return SDValue();
14179
14180 SelectionDAG &DAG = DCI.DAG;
14181 SDValue N0 = N->getOperand(0);
14182 SDValue N1 = N->getOperand(1);
14183 unsigned Opcode = N0.getOpcode();
14184 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14185 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14186 Opcode = N1.getOpcode();
14187 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14188 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14189 return SDValue();
14190 std::swap(N0, N1);
14191 }
14192
14193 if (N0 == N1)
14194 return SDValue();
14195
14196 EVT VT = N->getValueType(0);
14197 SDLoc DL(N);
14198 SDValue N00 = N0->getOperand(0);
14199 SDValue N01 = N0->getOperand(1);
14200 return DAG.getNode(Opcode, DL, VT,
14201 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14202 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14203}
14204
14206 const ARMSubtarget *Subtarget) {
14207 EVT VT = N->getValueType(0);
14208 if (VT != MVT::v2i64)
14209 return SDValue();
14210
14211 SDValue N0 = N->getOperand(0);
14212 SDValue N1 = N->getOperand(1);
14213
14214 auto IsSignExt = [&](SDValue Op) {
14215 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14216 return SDValue();
14217 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14218 if (VT.getScalarSizeInBits() == 32)
14219 return Op->getOperand(0);
14220 return SDValue();
14221 };
14222 auto IsZeroExt = [&](SDValue Op) {
14223 // Zero extends are a little more awkward. At the point we are matching
14224 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14225 // That might be before of after a bitcast depending on how the and is
14226 // placed. Because this has to look through bitcasts, it is currently only
14227 // supported on LE.
14228 if (!Subtarget->isLittle())
14229 return SDValue();
14230
14231 SDValue And = Op;
14232 if (And->getOpcode() == ISD::BITCAST)
14233 And = And->getOperand(0);
14234 if (And->getOpcode() != ISD::AND)
14235 return SDValue();
14236 SDValue Mask = And->getOperand(1);
14237 if (Mask->getOpcode() == ISD::BITCAST)
14238 Mask = Mask->getOperand(0);
14239
14240 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14241 Mask.getValueType() != MVT::v4i32)
14242 return SDValue();
14243 if (isAllOnesConstant(Mask->getOperand(0)) &&
14244 isNullConstant(Mask->getOperand(1)) &&
14245 isAllOnesConstant(Mask->getOperand(2)) &&
14246 isNullConstant(Mask->getOperand(3)))
14247 return And->getOperand(0);
14248 return SDValue();
14249 };
14250
14251 SDLoc dl(N);
14252 if (SDValue Op0 = IsSignExt(N0)) {
14253 if (SDValue Op1 = IsSignExt(N1)) {
14254 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14255 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14256 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14257 }
14258 }
14259 if (SDValue Op0 = IsZeroExt(N0)) {
14260 if (SDValue Op1 = IsZeroExt(N1)) {
14261 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14262 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14263 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14264 }
14265 }
14266
14267 return SDValue();
14268}
14269
14272 const ARMSubtarget *Subtarget) {
14273 SelectionDAG &DAG = DCI.DAG;
14274
14275 EVT VT = N->getValueType(0);
14276 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14277 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14278
14279 if (Subtarget->isThumb1Only())
14280 return SDValue();
14281
14282 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14283 return SDValue();
14284
14285 if (VT.is64BitVector() || VT.is128BitVector())
14286 return PerformVMULCombine(N, DCI, Subtarget);
14287 if (VT != MVT::i32)
14288 return SDValue();
14289
14290 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14291 if (!C)
14292 return SDValue();
14293
14294 int64_t MulAmt = C->getSExtValue();
14295 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14296
14297 ShiftAmt = ShiftAmt & (32 - 1);
14298 SDValue V = N->getOperand(0);
14299 SDLoc DL(N);
14300
14301 SDValue Res;
14302 MulAmt >>= ShiftAmt;
14303
14304 if (MulAmt >= 0) {
14305 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14306 // (mul x, 2^N + 1) => (add (shl x, N), x)
14307 Res = DAG.getNode(ISD::ADD, DL, VT,
14308 V,
14309 DAG.getNode(ISD::SHL, DL, VT,
14310 V,
14311 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14312 MVT::i32)));
14313 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14314 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14315 Res = DAG.getNode(ISD::SUB, DL, VT,
14316 DAG.getNode(ISD::SHL, DL, VT,
14317 V,
14318 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14319 MVT::i32)),
14320 V);
14321 } else
14322 return SDValue();
14323 } else {
14324 uint64_t MulAmtAbs = -MulAmt;
14325 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14326 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14327 Res = DAG.getNode(ISD::SUB, DL, VT,
14328 V,
14329 DAG.getNode(ISD::SHL, DL, VT,
14330 V,
14331 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14332 MVT::i32)));
14333 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14334 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14335 Res = DAG.getNode(ISD::ADD, DL, VT,
14336 V,
14337 DAG.getNode(ISD::SHL, DL, VT,
14338 V,
14339 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14340 MVT::i32)));
14341 Res = DAG.getNode(ISD::SUB, DL, VT,
14342 DAG.getConstant(0, DL, MVT::i32), Res);
14343 } else
14344 return SDValue();
14345 }
14346
14347 if (ShiftAmt != 0)
14348 Res = DAG.getNode(ISD::SHL, DL, VT,
14349 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14350
14351 // Do not add new nodes to DAG combiner worklist.
14352 DCI.CombineTo(N, Res, false);
14353 return SDValue();
14354}
14355
14358 const ARMSubtarget *Subtarget) {
14359 // Allow DAGCombine to pattern-match before we touch the canonical form.
14360 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14361 return SDValue();
14362
14363 if (N->getValueType(0) != MVT::i32)
14364 return SDValue();
14365
14366 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14367 if (!N1C)
14368 return SDValue();
14369
14370 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14371 // Don't transform uxtb/uxth.
14372 if (C1 == 255 || C1 == 65535)
14373 return SDValue();
14374
14375 SDNode *N0 = N->getOperand(0).getNode();
14376 if (!N0->hasOneUse())
14377 return SDValue();
14378
14379 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14380 return SDValue();
14381
14382 bool LeftShift = N0->getOpcode() == ISD::SHL;
14383
14384 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14385 if (!N01C)
14386 return SDValue();
14387
14388 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14389 if (!C2 || C2 >= 32)
14390 return SDValue();
14391
14392 // Clear irrelevant bits in the mask.
14393 if (LeftShift)
14394 C1 &= (-1U << C2);
14395 else
14396 C1 &= (-1U >> C2);
14397
14398 SelectionDAG &DAG = DCI.DAG;
14399 SDLoc DL(N);
14400
14401 // We have a pattern of the form "(and (shl x, c2) c1)" or
14402 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14403 // transform to a pair of shifts, to save materializing c1.
14404
14405 // First pattern: right shift, then mask off leading bits.
14406 // FIXME: Use demanded bits?
14407 if (!LeftShift && isMask_32(C1)) {
14408 uint32_t C3 = llvm::countl_zero(C1);
14409 if (C2 < C3) {
14410 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14411 DAG.getConstant(C3 - C2, DL, MVT::i32));
14412 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14413 DAG.getConstant(C3, DL, MVT::i32));
14414 }
14415 }
14416
14417 // First pattern, reversed: left shift, then mask off trailing bits.
14418 if (LeftShift && isMask_32(~C1)) {
14419 uint32_t C3 = llvm::countr_zero(C1);
14420 if (C2 < C3) {
14421 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14422 DAG.getConstant(C3 - C2, DL, MVT::i32));
14423 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14424 DAG.getConstant(C3, DL, MVT::i32));
14425 }
14426 }
14427
14428 // Second pattern: left shift, then mask off leading bits.
14429 // FIXME: Use demanded bits?
14430 if (LeftShift && isShiftedMask_32(C1)) {
14431 uint32_t Trailing = llvm::countr_zero(C1);
14432 uint32_t C3 = llvm::countl_zero(C1);
14433 if (Trailing == C2 && C2 + C3 < 32) {
14434 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14435 DAG.getConstant(C2 + C3, DL, MVT::i32));
14436 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14437 DAG.getConstant(C3, DL, MVT::i32));
14438 }
14439 }
14440
14441 // Second pattern, reversed: right shift, then mask off trailing bits.
14442 // FIXME: Handle other patterns of known/demanded bits.
14443 if (!LeftShift && isShiftedMask_32(C1)) {
14444 uint32_t Leading = llvm::countl_zero(C1);
14445 uint32_t C3 = llvm::countr_zero(C1);
14446 if (Leading == C2 && C2 + C3 < 32) {
14447 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14448 DAG.getConstant(C2 + C3, DL, MVT::i32));
14449 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14450 DAG.getConstant(C3, DL, MVT::i32));
14451 }
14452 }
14453
14454 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14455 // if "c1 >> c2" is a cheaper immediate than "c1"
14456 if (LeftShift &&
14457 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14458
14459 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14460 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14461 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14462 DAG.getConstant(C2, DL, MVT::i32));
14463 }
14464
14465 return SDValue();
14466}
14467
14470 const ARMSubtarget *Subtarget) {
14471 // Attempt to use immediate-form VBIC
14472 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14473 SDLoc dl(N);
14474 EVT VT = N->getValueType(0);
14475 SelectionDAG &DAG = DCI.DAG;
14476
14477 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14478 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14479 return SDValue();
14480
14481 APInt SplatBits, SplatUndef;
14482 unsigned SplatBitSize;
14483 bool HasAnyUndefs;
14484 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14485 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14486 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14487 SplatBitSize == 64) {
14488 EVT VbicVT;
14489 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14490 SplatUndef.getZExtValue(), SplatBitSize,
14491 DAG, dl, VbicVT, VT, OtherModImm);
14492 if (Val.getNode()) {
14493 SDValue Input =
14494 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
14495 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14496 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14497 }
14498 }
14499 }
14500
14501 if (!Subtarget->isThumb1Only()) {
14502 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14503 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14504 return Result;
14505
14506 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14507 return Result;
14508 }
14509
14510 if (Subtarget->isThumb1Only())
14511 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14512 return Result;
14513
14514 return SDValue();
14515}
14516
14517// Try combining OR nodes to SMULWB, SMULWT.
14520 const ARMSubtarget *Subtarget) {
14521 if (!Subtarget->hasV6Ops() ||
14522 (Subtarget->isThumb() &&
14523 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14524 return SDValue();
14525
14526 SDValue SRL = OR->getOperand(0);
14527 SDValue SHL = OR->getOperand(1);
14528
14529 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14530 SRL = OR->getOperand(1);
14531 SHL = OR->getOperand(0);
14532 }
14533 if (!isSRL16(SRL) || !isSHL16(SHL))
14534 return SDValue();
14535
14536 // The first operands to the shifts need to be the two results from the
14537 // same smul_lohi node.
14538 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14539 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14540 return SDValue();
14541
14542 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14543 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14544 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14545 return SDValue();
14546
14547 // Now we have:
14548 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14549 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14550 // For SMUWB the 16-bit value will signed extended somehow.
14551 // For SMULWT only the SRA is required.
14552 // Check both sides of SMUL_LOHI
14553 SDValue OpS16 = SMULLOHI->getOperand(0);
14554 SDValue OpS32 = SMULLOHI->getOperand(1);
14555
14556 SelectionDAG &DAG = DCI.DAG;
14557 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14558 OpS16 = OpS32;
14559 OpS32 = SMULLOHI->getOperand(0);
14560 }
14561
14562 SDLoc dl(OR);
14563 unsigned Opcode = 0;
14564 if (isS16(OpS16, DAG))
14565 Opcode = ARMISD::SMULWB;
14566 else if (isSRA16(OpS16)) {
14567 Opcode = ARMISD::SMULWT;
14568 OpS16 = OpS16->getOperand(0);
14569 }
14570 else
14571 return SDValue();
14572
14573 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14574 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14575 return SDValue(OR, 0);
14576}
14577
14580 const ARMSubtarget *Subtarget) {
14581 // BFI is only available on V6T2+
14582 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14583 return SDValue();
14584
14585 EVT VT = N->getValueType(0);
14586 SDValue N0 = N->getOperand(0);
14587 SDValue N1 = N->getOperand(1);
14588 SelectionDAG &DAG = DCI.DAG;
14589 SDLoc DL(N);
14590 // 1) or (and A, mask), val => ARMbfi A, val, mask
14591 // iff (val & mask) == val
14592 //
14593 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14594 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14595 // && mask == ~mask2
14596 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14597 // && ~mask == mask2
14598 // (i.e., copy a bitfield value into another bitfield of the same width)
14599
14600 if (VT != MVT::i32)
14601 return SDValue();
14602
14603 SDValue N00 = N0.getOperand(0);
14604
14605 // The value and the mask need to be constants so we can verify this is
14606 // actually a bitfield set. If the mask is 0xffff, we can do better
14607 // via a movt instruction, so don't use BFI in that case.
14608 SDValue MaskOp = N0.getOperand(1);
14609 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14610 if (!MaskC)
14611 return SDValue();
14612 unsigned Mask = MaskC->getZExtValue();
14613 if (Mask == 0xffff)
14614 return SDValue();
14615 SDValue Res;
14616 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14617 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14618 if (N1C) {
14619 unsigned Val = N1C->getZExtValue();
14620 if ((Val & ~Mask) != Val)
14621 return SDValue();
14622
14623 if (ARM::isBitFieldInvertedMask(Mask)) {
14624 Val >>= llvm::countr_zero(~Mask);
14625
14626 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14627 DAG.getConstant(Val, DL, MVT::i32),
14628 DAG.getConstant(Mask, DL, MVT::i32));
14629
14630 DCI.CombineTo(N, Res, false);
14631 // Return value from the original node to inform the combiner than N is
14632 // now dead.
14633 return SDValue(N, 0);
14634 }
14635 } else if (N1.getOpcode() == ISD::AND) {
14636 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14637 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14638 if (!N11C)
14639 return SDValue();
14640 unsigned Mask2 = N11C->getZExtValue();
14641
14642 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14643 // as is to match.
14644 if (ARM::isBitFieldInvertedMask(Mask) &&
14645 (Mask == ~Mask2)) {
14646 // The pack halfword instruction works better for masks that fit it,
14647 // so use that when it's available.
14648 if (Subtarget->hasDSP() &&
14649 (Mask == 0xffff || Mask == 0xffff0000))
14650 return SDValue();
14651 // 2a
14652 unsigned amt = llvm::countr_zero(Mask2);
14653 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14654 DAG.getConstant(amt, DL, MVT::i32));
14655 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14656 DAG.getConstant(Mask, DL, MVT::i32));
14657 DCI.CombineTo(N, Res, false);
14658 // Return value from the original node to inform the combiner than N is
14659 // now dead.
14660 return SDValue(N, 0);
14661 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14662 (~Mask == Mask2)) {
14663 // The pack halfword instruction works better for masks that fit it,
14664 // so use that when it's available.
14665 if (Subtarget->hasDSP() &&
14666 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14667 return SDValue();
14668 // 2b
14669 unsigned lsb = llvm::countr_zero(Mask);
14670 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14671 DAG.getConstant(lsb, DL, MVT::i32));
14672 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14673 DAG.getConstant(Mask2, DL, MVT::i32));
14674 DCI.CombineTo(N, Res, false);
14675 // Return value from the original node to inform the combiner than N is
14676 // now dead.
14677 return SDValue(N, 0);
14678 }
14679 }
14680
14681 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14682 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14684 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14685 // where lsb(mask) == #shamt and masked bits of B are known zero.
14686 SDValue ShAmt = N00.getOperand(1);
14687 unsigned ShAmtC = ShAmt->getAsZExtVal();
14688 unsigned LSB = llvm::countr_zero(Mask);
14689 if (ShAmtC != LSB)
14690 return SDValue();
14691
14692 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14693 DAG.getConstant(~Mask, DL, MVT::i32));
14694
14695 DCI.CombineTo(N, Res, false);
14696 // Return value from the original node to inform the combiner than N is
14697 // now dead.
14698 return SDValue(N, 0);
14699 }
14700
14701 return SDValue();
14702}
14703
14704static bool isValidMVECond(unsigned CC, bool IsFloat) {
14705 switch (CC) {
14706 case ARMCC::EQ:
14707 case ARMCC::NE:
14708 case ARMCC::LE:
14709 case ARMCC::GT:
14710 case ARMCC::GE:
14711 case ARMCC::LT:
14712 return true;
14713 case ARMCC::HS:
14714 case ARMCC::HI:
14715 return !IsFloat;
14716 default:
14717 return false;
14718 };
14719}
14720
14722 if (N->getOpcode() == ARMISD::VCMP)
14723 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14724 else if (N->getOpcode() == ARMISD::VCMPZ)
14725 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14726 else
14727 llvm_unreachable("Not a VCMP/VCMPZ!");
14728}
14729
14732 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14733}
14734
14736 const ARMSubtarget *Subtarget) {
14737 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14738 // together with predicates
14739 EVT VT = N->getValueType(0);
14740 SDLoc DL(N);
14741 SDValue N0 = N->getOperand(0);
14742 SDValue N1 = N->getOperand(1);
14743
14744 auto IsFreelyInvertable = [&](SDValue V) {
14745 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14746 return CanInvertMVEVCMP(V);
14747 return false;
14748 };
14749
14750 // At least one operand must be freely invertable.
14751 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14752 return SDValue();
14753
14754 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14755 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14756 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14757 return DAG.getLogicalNOT(DL, And, VT);
14758}
14759
14760/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14763 const ARMSubtarget *Subtarget) {
14764 // Attempt to use immediate-form VORR
14765 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14766 SDLoc dl(N);
14767 EVT VT = N->getValueType(0);
14768 SelectionDAG &DAG = DCI.DAG;
14769
14770 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14771 return SDValue();
14772
14773 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14774 VT == MVT::v8i1 || VT == MVT::v16i1))
14775 return PerformORCombine_i1(N, DAG, Subtarget);
14776
14777 APInt SplatBits, SplatUndef;
14778 unsigned SplatBitSize;
14779 bool HasAnyUndefs;
14780 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14781 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14782 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14783 SplatBitSize == 64) {
14784 EVT VorrVT;
14785 SDValue Val =
14786 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14787 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14788 if (Val.getNode()) {
14789 SDValue Input =
14790 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
14791 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14792 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14793 }
14794 }
14795 }
14796
14797 if (!Subtarget->isThumb1Only()) {
14798 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14799 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14800 return Result;
14801 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14802 return Result;
14803 }
14804
14805 SDValue N0 = N->getOperand(0);
14806 SDValue N1 = N->getOperand(1);
14807
14808 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14809 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14811
14812 // The code below optimizes (or (and X, Y), Z).
14813 // The AND operand needs to have a single user to make these optimizations
14814 // profitable.
14815 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14816 return SDValue();
14817
14818 APInt SplatUndef;
14819 unsigned SplatBitSize;
14820 bool HasAnyUndefs;
14821
14822 APInt SplatBits0, SplatBits1;
14823 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14824 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14825 // Ensure that the second operand of both ands are constants
14826 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14827 HasAnyUndefs) && !HasAnyUndefs) {
14828 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14829 HasAnyUndefs) && !HasAnyUndefs) {
14830 // Ensure that the bit width of the constants are the same and that
14831 // the splat arguments are logical inverses as per the pattern we
14832 // are trying to simplify.
14833 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14834 SplatBits0 == ~SplatBits1) {
14835 // Canonicalize the vector type to make instruction selection
14836 // simpler.
14837 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14838 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14839 N0->getOperand(1),
14840 N0->getOperand(0),
14841 N1->getOperand(0));
14842 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14843 }
14844 }
14845 }
14846 }
14847
14848 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14849 // reasonable.
14850 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14851 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14852 return Res;
14853 }
14854
14855 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14856 return Result;
14857
14858 return SDValue();
14859}
14860
14863 const ARMSubtarget *Subtarget) {
14864 EVT VT = N->getValueType(0);
14865 SelectionDAG &DAG = DCI.DAG;
14866
14867 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14868 return SDValue();
14869
14870 if (!Subtarget->isThumb1Only()) {
14871 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14872 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14873 return Result;
14874
14875 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14876 return Result;
14877 }
14878
14879 if (Subtarget->hasMVEIntegerOps()) {
14880 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14881 SDValue N0 = N->getOperand(0);
14882 SDValue N1 = N->getOperand(1);
14883 const TargetLowering *TLI = Subtarget->getTargetLowering();
14884 if (TLI->isConstTrueVal(N1) &&
14885 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14886 if (CanInvertMVEVCMP(N0)) {
14887 SDLoc DL(N0);
14889
14891 Ops.push_back(N0->getOperand(0));
14892 if (N0->getOpcode() == ARMISD::VCMP)
14893 Ops.push_back(N0->getOperand(1));
14894 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14895 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14896 }
14897 }
14898 }
14899
14900 return SDValue();
14901}
14902
14903// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14904// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14905// their position in "to" (Rd).
14906static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14907 assert(N->getOpcode() == ARMISD::BFI);
14908
14909 SDValue From = N->getOperand(1);
14910 ToMask = ~N->getConstantOperandAPInt(2);
14911 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14912
14913 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14914 // #C in the base of the SHR.
14915 if (From->getOpcode() == ISD::SRL &&
14916 isa<ConstantSDNode>(From->getOperand(1))) {
14917 APInt Shift = From->getConstantOperandAPInt(1);
14918 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14919 FromMask <<= Shift.getLimitedValue(31);
14920 From = From->getOperand(0);
14921 }
14922
14923 return From;
14924}
14925
14926// If A and B contain one contiguous set of bits, does A | B == A . B?
14927//
14928// Neither A nor B must be zero.
14929static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14930 unsigned LastActiveBitInA = A.countr_zero();
14931 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14932 return LastActiveBitInA - 1 == FirstActiveBitInB;
14933}
14934
14936 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14937 APInt ToMask, FromMask;
14938 SDValue From = ParseBFI(N, ToMask, FromMask);
14939 SDValue To = N->getOperand(0);
14940
14941 SDValue V = To;
14942 if (V.getOpcode() != ARMISD::BFI)
14943 return SDValue();
14944
14945 APInt NewToMask, NewFromMask;
14946 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14947 if (NewFrom != From)
14948 return SDValue();
14949
14950 // Do the written bits conflict with any we've seen so far?
14951 if ((NewToMask & ToMask).getBoolValue())
14952 // Conflicting bits.
14953 return SDValue();
14954
14955 // Are the new bits contiguous when combined with the old bits?
14956 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14957 BitsProperlyConcatenate(FromMask, NewFromMask))
14958 return V;
14959 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14960 BitsProperlyConcatenate(NewFromMask, FromMask))
14961 return V;
14962
14963 return SDValue();
14964}
14965
14967 SDValue N0 = N->getOperand(0);
14968 SDValue N1 = N->getOperand(1);
14969
14970 if (N1.getOpcode() == ISD::AND) {
14971 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14972 // the bits being cleared by the AND are not demanded by the BFI.
14973 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14974 if (!N11C)
14975 return SDValue();
14976 unsigned InvMask = N->getConstantOperandVal(2);
14977 unsigned LSB = llvm::countr_zero(~InvMask);
14978 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14979 assert(Width <
14980 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14981 "undefined behavior");
14982 unsigned Mask = (1u << Width) - 1;
14983 unsigned Mask2 = N11C->getZExtValue();
14984 if ((Mask & (~Mask2)) == 0)
14985 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14986 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14987 return SDValue();
14988 }
14989
14990 // Look for another BFI to combine with.
14991 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14992 // We've found a BFI.
14993 APInt ToMask1, FromMask1;
14994 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14995
14996 APInt ToMask2, FromMask2;
14997 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14998 assert(From1 == From2);
14999 (void)From2;
15000
15001 // Create a new BFI, combining the two together.
15002 APInt NewFromMask = FromMask1 | FromMask2;
15003 APInt NewToMask = ToMask1 | ToMask2;
15004
15005 EVT VT = N->getValueType(0);
15006 SDLoc dl(N);
15007
15008 if (NewFromMask[0] == 0)
15009 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
15010 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
15011 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
15012 DAG.getConstant(~NewToMask, dl, VT));
15013 }
15014
15015 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
15016 // that lower bit insertions are performed first, providing that M1 and M2
15017 // do no overlap. This can allow multiple BFI instructions to be combined
15018 // together by the other folds above.
15019 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
15020 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
15021 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
15022
15023 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
15024 ToMask1.countl_zero() < ToMask2.countl_zero())
15025 return SDValue();
15026
15027 EVT VT = N->getValueType(0);
15028 SDLoc dl(N);
15029 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
15030 N->getOperand(1), N->getOperand(2));
15031 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
15032 N0.getOperand(2));
15033 }
15034
15035 return SDValue();
15036}
15037
15038// Check that N is CMPZ(CSINC(0, 0, CC, X)),
15039// or CMPZ(CMOV(1, 0, CC, X))
15040// return X if valid.
15042 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
15043 return SDValue();
15044 SDValue CSInc = Cmp->getOperand(0);
15045
15046 // Ignore any `And 1` nodes that may not yet have been removed. We are
15047 // looking for a value that produces 1/0, so these have no effect on the
15048 // code.
15049 while (CSInc.getOpcode() == ISD::AND &&
15050 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
15051 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15052 CSInc = CSInc.getOperand(0);
15053
15054 if (CSInc.getOpcode() == ARMISD::CSINC &&
15055 isNullConstant(CSInc.getOperand(0)) &&
15056 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15058 return CSInc.getOperand(3);
15059 }
15060 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15061 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15063 return CSInc.getOperand(3);
15064 }
15065 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15066 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15069 return CSInc.getOperand(3);
15070 }
15071 return SDValue();
15072}
15073
15075 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15076 // t92: flags = ARMISD::CMPZ t74, 0
15077 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15078 // t96: flags = ARMISD::CMPZ t93, 0
15079 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15081 if (SDValue C = IsCMPZCSINC(N, Cond))
15082 if (Cond == ARMCC::EQ)
15083 return C;
15084 return SDValue();
15085}
15086
15088 // Fold away an unneccessary CMPZ/CSINC
15089 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15090 // if C1==EQ -> CSXYZ A, B, C2, D
15091 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15093 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15094 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15095 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15096 N->getOperand(1),
15097 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15098 if (N->getConstantOperandVal(2) == ARMCC::NE)
15099 return DAG.getNode(
15100 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15101 N->getOperand(1),
15103 }
15104 return SDValue();
15105}
15106
15107/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15108/// ARMISD::VMOVRRD.
15111 const ARMSubtarget *Subtarget) {
15112 // vmovrrd(vmovdrr x, y) -> x,y
15113 SDValue InDouble = N->getOperand(0);
15114 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15115 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15116
15117 // vmovrrd(load f64) -> (load i32), (load i32)
15118 SDNode *InNode = InDouble.getNode();
15119 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15120 InNode->getValueType(0) == MVT::f64 &&
15121 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15122 !cast<LoadSDNode>(InNode)->isVolatile()) {
15123 // TODO: Should this be done for non-FrameIndex operands?
15124 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15125
15126 SelectionDAG &DAG = DCI.DAG;
15127 SDLoc DL(LD);
15128 SDValue BasePtr = LD->getBasePtr();
15129 SDValue NewLD1 =
15130 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15131 LD->getAlign(), LD->getMemOperand()->getFlags());
15132
15133 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15134 DAG.getConstant(4, DL, MVT::i32));
15135
15136 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15137 LD->getPointerInfo().getWithOffset(4),
15138 commonAlignment(LD->getAlign(), 4),
15139 LD->getMemOperand()->getFlags());
15140
15141 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15142 if (DCI.DAG.getDataLayout().isBigEndian())
15143 std::swap (NewLD1, NewLD2);
15144 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15145 return Result;
15146 }
15147
15148 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15149 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15150 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15151 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15152 SDValue BV = InDouble.getOperand(0);
15153 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15154 // change lane order under big endian.
15155 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15156 while (
15157 (BV.getOpcode() == ISD::BITCAST ||
15159 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15160 BVSwap = BV.getOpcode() == ISD::BITCAST;
15161 BV = BV.getOperand(0);
15162 }
15163 if (BV.getValueType() != MVT::v4i32)
15164 return SDValue();
15165
15166 // Handle buildvectors, pulling out the correct lane depending on
15167 // endianness.
15168 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15169 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15170 SDValue Op0 = BV.getOperand(Offset);
15171 SDValue Op1 = BV.getOperand(Offset + 1);
15172 if (!Subtarget->isLittle() && BVSwap)
15173 std::swap(Op0, Op1);
15174
15175 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15176 }
15177
15178 // A chain of insert_vectors, grabbing the correct value of the chain of
15179 // inserts.
15180 SDValue Op0, Op1;
15181 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15182 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15183 if (BV.getConstantOperandVal(2) == Offset && !Op0)
15184 Op0 = BV.getOperand(1);
15185 if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
15186 Op1 = BV.getOperand(1);
15187 }
15188 BV = BV.getOperand(0);
15189 }
15190 if (!Subtarget->isLittle() && BVSwap)
15191 std::swap(Op0, Op1);
15192 if (Op0 && Op1)
15193 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15194 }
15195
15196 return SDValue();
15197}
15198
15199/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15200/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15202 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15203 SDValue Op0 = N->getOperand(0);
15204 SDValue Op1 = N->getOperand(1);
15205 if (Op0.getOpcode() == ISD::BITCAST)
15206 Op0 = Op0.getOperand(0);
15207 if (Op1.getOpcode() == ISD::BITCAST)
15208 Op1 = Op1.getOperand(0);
15209 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15210 Op0.getNode() == Op1.getNode() &&
15211 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15212 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15213 N->getValueType(0), Op0.getOperand(0));
15214 return SDValue();
15215}
15216
15219 SDValue Op0 = N->getOperand(0);
15220
15221 // VMOVhr (VMOVrh (X)) -> X
15222 if (Op0->getOpcode() == ARMISD::VMOVrh)
15223 return Op0->getOperand(0);
15224
15225 // FullFP16: half values are passed in S-registers, and we don't
15226 // need any of the bitcast and moves:
15227 //
15228 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15229 // t5: i32 = bitcast t2
15230 // t18: f16 = ARMISD::VMOVhr t5
15231 // =>
15232 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15233 if (Op0->getOpcode() == ISD::BITCAST) {
15234 SDValue Copy = Op0->getOperand(0);
15235 if (Copy.getValueType() == MVT::f32 &&
15236 Copy->getOpcode() == ISD::CopyFromReg) {
15237 bool HasGlue = Copy->getNumOperands() == 3;
15238 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15239 HasGlue ? Copy->getOperand(2) : SDValue()};
15240 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15241 SDValue NewCopy =
15243 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15244 ArrayRef(Ops, HasGlue ? 3 : 2));
15245
15246 // Update Users, Chains, and Potential Glue.
15247 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15248 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15249 if (HasGlue)
15250 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15251 NewCopy.getValue(2));
15252
15253 return NewCopy;
15254 }
15255 }
15256
15257 // fold (VMOVhr (load x)) -> (load (f16*)x)
15258 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15259 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15260 LN0->getMemoryVT() == MVT::i16) {
15261 SDValue Load =
15262 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15263 LN0->getBasePtr(), LN0->getMemOperand());
15264 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15265 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15266 return Load;
15267 }
15268 }
15269
15270 // Only the bottom 16 bits of the source register are used.
15271 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15272 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15273 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15274 return SDValue(N, 0);
15275
15276 return SDValue();
15277}
15278
15280 SDValue N0 = N->getOperand(0);
15281 EVT VT = N->getValueType(0);
15282
15283 // fold (VMOVrh (fpconst x)) -> const x
15284 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15285 APFloat V = C->getValueAPF();
15286 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15287 }
15288
15289 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15290 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15291 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15292
15293 SDValue Load =
15294 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15295 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15296 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15297 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15298 return Load;
15299 }
15300
15301 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15302 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15303 isa<ConstantSDNode>(N0->getOperand(1)))
15304 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15305 N0->getOperand(1));
15306
15307 return SDValue();
15308}
15309
15310/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15311/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15312/// i64 vector to have f64 elements, since the value can then be loaded
15313/// directly into a VFP register.
15315 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15316 for (unsigned i = 0; i < NumElts; ++i) {
15317 SDNode *Elt = N->getOperand(i).getNode();
15318 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15319 return true;
15320 }
15321 return false;
15322}
15323
15324/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15325/// ISD::BUILD_VECTOR.
15328 const ARMSubtarget *Subtarget) {
15329 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15330 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15331 // into a pair of GPRs, which is fine when the value is used as a scalar,
15332 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15333 SelectionDAG &DAG = DCI.DAG;
15334 if (N->getNumOperands() == 2)
15335 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15336 return RV;
15337
15338 // Load i64 elements as f64 values so that type legalization does not split
15339 // them up into i32 values.
15340 EVT VT = N->getValueType(0);
15341 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15342 return SDValue();
15343 SDLoc dl(N);
15345 unsigned NumElts = VT.getVectorNumElements();
15346 for (unsigned i = 0; i < NumElts; ++i) {
15347 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15348 Ops.push_back(V);
15349 // Make the DAGCombiner fold the bitcast.
15350 DCI.AddToWorklist(V.getNode());
15351 }
15352 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15353 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15354 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15355}
15356
15357/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15358static SDValue
15360 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15361 // At that time, we may have inserted bitcasts from integer to float.
15362 // If these bitcasts have survived DAGCombine, change the lowering of this
15363 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15364 // force to use floating point types.
15365
15366 // Make sure we can change the type of the vector.
15367 // This is possible iff:
15368 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15369 // 1.1. Vector is used only once.
15370 // 1.2. Use is a bit convert to an integer type.
15371 // 2. The size of its operands are 32-bits (64-bits are not legal).
15372 EVT VT = N->getValueType(0);
15373 EVT EltVT = VT.getVectorElementType();
15374
15375 // Check 1.1. and 2.
15376 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15377 return SDValue();
15378
15379 // By construction, the input type must be float.
15380 assert(EltVT == MVT::f32 && "Unexpected type!");
15381
15382 // Check 1.2.
15383 SDNode *Use = *N->user_begin();
15384 if (Use->getOpcode() != ISD::BITCAST ||
15385 Use->getValueType(0).isFloatingPoint())
15386 return SDValue();
15387
15388 // Check profitability.
15389 // Model is, if more than half of the relevant operands are bitcast from
15390 // i32, turn the build_vector into a sequence of insert_vector_elt.
15391 // Relevant operands are everything that is not statically
15392 // (i.e., at compile time) bitcasted.
15393 unsigned NumOfBitCastedElts = 0;
15394 unsigned NumElts = VT.getVectorNumElements();
15395 unsigned NumOfRelevantElts = NumElts;
15396 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15397 SDValue Elt = N->getOperand(Idx);
15398 if (Elt->getOpcode() == ISD::BITCAST) {
15399 // Assume only bit cast to i32 will go away.
15400 if (Elt->getOperand(0).getValueType() == MVT::i32)
15401 ++NumOfBitCastedElts;
15402 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15403 // Constants are statically casted, thus do not count them as
15404 // relevant operands.
15405 --NumOfRelevantElts;
15406 }
15407
15408 // Check if more than half of the elements require a non-free bitcast.
15409 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15410 return SDValue();
15411
15412 SelectionDAG &DAG = DCI.DAG;
15413 // Create the new vector type.
15414 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15415 // Check if the type is legal.
15416 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15417 if (!TLI.isTypeLegal(VecVT))
15418 return SDValue();
15419
15420 // Combine:
15421 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15422 // => BITCAST INSERT_VECTOR_ELT
15423 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15424 // (BITCAST EN), N.
15425 SDValue Vec = DAG.getUNDEF(VecVT);
15426 SDLoc dl(N);
15427 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15428 SDValue V = N->getOperand(Idx);
15429 if (V.isUndef())
15430 continue;
15431 if (V.getOpcode() == ISD::BITCAST &&
15432 V->getOperand(0).getValueType() == MVT::i32)
15433 // Fold obvious case.
15434 V = V.getOperand(0);
15435 else {
15436 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15437 // Make the DAGCombiner fold the bitcasts.
15438 DCI.AddToWorklist(V.getNode());
15439 }
15440 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15441 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15442 }
15443 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15444 // Make the DAGCombiner fold the bitcasts.
15445 DCI.AddToWorklist(Vec.getNode());
15446 return Vec;
15447}
15448
15449static SDValue
15451 EVT VT = N->getValueType(0);
15452 SDValue Op = N->getOperand(0);
15453 SDLoc dl(N);
15454
15455 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15456 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15457 // If the valuetypes are the same, we can remove the cast entirely.
15458 if (Op->getOperand(0).getValueType() == VT)
15459 return Op->getOperand(0);
15460 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15461 }
15462
15463 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15464 // more VPNOT which might get folded as else predicates.
15465 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15466 SDValue X =
15467 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15469 DCI.DAG.getConstant(65535, dl, MVT::i32));
15470 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15471 }
15472
15473 // Only the bottom 16 bits of the source register are used.
15474 if (Op.getValueType() == MVT::i32) {
15475 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15476 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15477 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15478 return SDValue(N, 0);
15479 }
15480 return SDValue();
15481}
15482
15484 const ARMSubtarget *ST) {
15485 EVT VT = N->getValueType(0);
15486 SDValue Op = N->getOperand(0);
15487 SDLoc dl(N);
15488
15489 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15490 if (ST->isLittle())
15491 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15492
15493 // VT VECTOR_REG_CAST (VT Op) -> Op
15494 if (Op.getValueType() == VT)
15495 return Op;
15496 // VECTOR_REG_CAST undef -> undef
15497 if (Op.isUndef())
15498 return DAG.getUNDEF(VT);
15499
15500 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15501 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15502 // If the valuetypes are the same, we can remove the cast entirely.
15503 if (Op->getOperand(0).getValueType() == VT)
15504 return Op->getOperand(0);
15505 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15506 }
15507
15508 return SDValue();
15509}
15510
15512 const ARMSubtarget *Subtarget) {
15513 if (!Subtarget->hasMVEIntegerOps())
15514 return SDValue();
15515
15516 EVT VT = N->getValueType(0);
15517 SDValue Op0 = N->getOperand(0);
15518 SDValue Op1 = N->getOperand(1);
15519 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15520 SDLoc dl(N);
15521
15522 // vcmp X, 0, cc -> vcmpz X, cc
15523 if (isZeroVector(Op1))
15524 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15525
15526 unsigned SwappedCond = getSwappedCondition(Cond);
15527 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15528 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15529 if (isZeroVector(Op0))
15530 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15531 DAG.getConstant(SwappedCond, dl, MVT::i32));
15532 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15533 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15534 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15535 DAG.getConstant(SwappedCond, dl, MVT::i32));
15536 }
15537
15538 return SDValue();
15539}
15540
15541/// PerformInsertEltCombine - Target-specific dag combine xforms for
15542/// ISD::INSERT_VECTOR_ELT.
15545 // Bitcast an i64 load inserted into a vector to f64.
15546 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15547 EVT VT = N->getValueType(0);
15548 SDNode *Elt = N->getOperand(1).getNode();
15549 if (VT.getVectorElementType() != MVT::i64 ||
15550 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15551 return SDValue();
15552
15553 SelectionDAG &DAG = DCI.DAG;
15554 SDLoc dl(N);
15555 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15557 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15558 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15559 // Make the DAGCombiner fold the bitcasts.
15560 DCI.AddToWorklist(Vec.getNode());
15561 DCI.AddToWorklist(V.getNode());
15562 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15563 Vec, V, N->getOperand(2));
15564 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15565}
15566
15567// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15568// directly or bitcast to an integer if the original is a float vector.
15569// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15570// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15571static SDValue
15573 EVT VT = N->getValueType(0);
15574 SDLoc dl(N);
15575
15576 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15577 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15578 return SDValue();
15579
15580 SDValue Ext = SDValue(N, 0);
15581 if (Ext.getOpcode() == ISD::BITCAST &&
15582 Ext.getOperand(0).getValueType() == MVT::f32)
15583 Ext = Ext.getOperand(0);
15584 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15585 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15586 Ext.getConstantOperandVal(1) % 2 != 0)
15587 return SDValue();
15588 if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||
15589 Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))
15590 return SDValue();
15591
15592 SDValue Op0 = Ext.getOperand(0);
15593 EVT VecVT = Op0.getValueType();
15594 unsigned ResNo = Op0.getResNo();
15595 unsigned Lane = Ext.getConstantOperandVal(1);
15596 if (VecVT.getVectorNumElements() != 4)
15597 return SDValue();
15598
15599 // Find another extract, of Lane + 1
15600 auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {
15601 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15602 isa<ConstantSDNode>(V->getOperand(1)) &&
15603 V->getConstantOperandVal(1) == Lane + 1 &&
15604 V->getOperand(0).getResNo() == ResNo;
15605 });
15606 if (OtherIt == Op0->users().end())
15607 return SDValue();
15608
15609 // For float extracts, we need to be converting to a i32 for both vector
15610 // lanes.
15611 SDValue OtherExt(*OtherIt, 0);
15612 if (OtherExt.getValueType() != MVT::i32) {
15613 if (!OtherExt->hasOneUse() ||
15614 OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||
15615 OtherExt->user_begin()->getValueType(0) != MVT::i32)
15616 return SDValue();
15617 OtherExt = SDValue(*OtherExt->user_begin(), 0);
15618 }
15619
15620 // Convert the type to a f64 and extract with a VMOVRRD.
15621 SDValue F64 = DCI.DAG.getNode(
15622 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15623 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15624 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15625 SDValue VMOVRRD =
15626 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15627
15628 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15629 return VMOVRRD;
15630}
15631
15634 const ARMSubtarget *ST) {
15635 SDValue Op0 = N->getOperand(0);
15636 EVT VT = N->getValueType(0);
15637 SDLoc dl(N);
15638
15639 // extract (vdup x) -> x
15640 if (Op0->getOpcode() == ARMISD::VDUP) {
15641 SDValue X = Op0->getOperand(0);
15642 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15643 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15644 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15645 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15646 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15647 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15648
15649 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15650 X = X->getOperand(0);
15651 if (X.getValueType() == VT)
15652 return X;
15653 }
15654
15655 // extract ARM_BUILD_VECTOR -> x
15656 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15657 isa<ConstantSDNode>(N->getOperand(1)) &&
15658 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15659 return Op0.getOperand(N->getConstantOperandVal(1));
15660 }
15661
15662 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15663 if (Op0.getValueType() == MVT::v4i32 &&
15664 isa<ConstantSDNode>(N->getOperand(1)) &&
15665 Op0.getOpcode() == ISD::BITCAST &&
15667 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15668 SDValue BV = Op0.getOperand(0);
15669 unsigned Offset = N->getConstantOperandVal(1);
15670 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15671 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15672 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15673 }
15674
15675 // extract x, n; extract x, n+1 -> VMOVRRD x
15676 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15677 return R;
15678
15679 // extract (MVETrunc(x)) -> extract x
15680 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15681 unsigned Idx = N->getConstantOperandVal(1);
15682 unsigned Vec =
15684 unsigned SubIdx =
15686 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15687 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15688 }
15689
15690 return SDValue();
15691}
15692
15694 SDValue Op = N->getOperand(0);
15695 EVT VT = N->getValueType(0);
15696
15697 // sext_inreg(VGETLANEu) -> VGETLANEs
15698 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15699 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15700 Op.getOperand(0).getValueType().getScalarType())
15701 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15702 Op.getOperand(1));
15703
15704 return SDValue();
15705}
15706
15707static SDValue
15709 SDValue Vec = N->getOperand(0);
15710 SDValue SubVec = N->getOperand(1);
15711 uint64_t IdxVal = N->getConstantOperandVal(2);
15712 EVT VecVT = Vec.getValueType();
15713 EVT SubVT = SubVec.getValueType();
15714
15715 // Only do this for legal fixed vector types.
15716 if (!VecVT.isFixedLengthVector() ||
15717 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15719 return SDValue();
15720
15721 // Ignore widening patterns.
15722 if (IdxVal == 0 && Vec.isUndef())
15723 return SDValue();
15724
15725 // Subvector must be half the width and an "aligned" insertion.
15726 unsigned NumSubElts = SubVT.getVectorNumElements();
15727 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15728 (IdxVal != 0 && IdxVal != NumSubElts))
15729 return SDValue();
15730
15731 // Fold insert_subvector -> concat_vectors
15732 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15733 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15734 SDLoc DL(N);
15735 SDValue Lo, Hi;
15736 if (IdxVal == 0) {
15737 Lo = SubVec;
15738 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15739 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15740 } else {
15741 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15742 DCI.DAG.getVectorIdxConstant(0, DL));
15743 Hi = SubVec;
15744 }
15745 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15746}
15747
15748// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15750 SelectionDAG &DAG) {
15751 SDValue Trunc = N->getOperand(0);
15752 EVT VT = Trunc.getValueType();
15753 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15754 return SDValue();
15755
15756 SDLoc DL(Trunc);
15757 if (isVMOVNTruncMask(N->getMask(), VT, false))
15758 return DAG.getNode(
15759 ARMISD::VMOVN, DL, VT,
15760 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15761 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15762 DAG.getConstant(1, DL, MVT::i32));
15763 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15764 return DAG.getNode(
15765 ARMISD::VMOVN, DL, VT,
15766 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15767 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15768 DAG.getConstant(1, DL, MVT::i32));
15769 return SDValue();
15770}
15771
15772/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15773/// ISD::VECTOR_SHUFFLE.
15775 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15776 return R;
15777
15778 // The LLVM shufflevector instruction does not require the shuffle mask
15779 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15780 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15781 // operands do not match the mask length, they are extended by concatenating
15782 // them with undef vectors. That is probably the right thing for other
15783 // targets, but for NEON it is better to concatenate two double-register
15784 // size vector operands into a single quad-register size vector. Do that
15785 // transformation here:
15786 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15787 // shuffle(concat(v1, v2), undef)
15788 SDValue Op0 = N->getOperand(0);
15789 SDValue Op1 = N->getOperand(1);
15790 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15791 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15792 Op0.getNumOperands() != 2 ||
15793 Op1.getNumOperands() != 2)
15794 return SDValue();
15795 SDValue Concat0Op1 = Op0.getOperand(1);
15796 SDValue Concat1Op1 = Op1.getOperand(1);
15797 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15798 return SDValue();
15799 // Skip the transformation if any of the types are illegal.
15800 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15801 EVT VT = N->getValueType(0);
15802 if (!TLI.isTypeLegal(VT) ||
15803 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15804 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15805 return SDValue();
15806
15807 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15808 Op0.getOperand(0), Op1.getOperand(0));
15809 // Translate the shuffle mask.
15810 SmallVector<int, 16> NewMask;
15811 unsigned NumElts = VT.getVectorNumElements();
15812 unsigned HalfElts = NumElts/2;
15813 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15814 for (unsigned n = 0; n < NumElts; ++n) {
15815 int MaskElt = SVN->getMaskElt(n);
15816 int NewElt = -1;
15817 if (MaskElt < (int)HalfElts)
15818 NewElt = MaskElt;
15819 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15820 NewElt = HalfElts + MaskElt - NumElts;
15821 NewMask.push_back(NewElt);
15822 }
15823 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15824 DAG.getUNDEF(VT), NewMask);
15825}
15826
15827/// Load/store instruction that can be merged with a base address
15828/// update
15833 unsigned AddrOpIdx;
15834};
15835
15837 /// Instruction that updates a pointer
15839 /// Pointer increment operand
15841 /// Pointer increment value if it is a constant, or 0 otherwise
15842 unsigned ConstInc;
15843};
15844
15846 struct BaseUpdateUser &User,
15847 bool SimpleConstIncOnly,
15849 SelectionDAG &DAG = DCI.DAG;
15850 SDNode *N = Target.N;
15851 MemSDNode *MemN = cast<MemSDNode>(N);
15852 SDLoc dl(N);
15853
15854 // Find the new opcode for the updating load/store.
15855 bool isLoadOp = true;
15856 bool isLaneOp = false;
15857 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15858 // as an operand.
15859 bool hasAlignment = true;
15860 unsigned NewOpc = 0;
15861 unsigned NumVecs = 0;
15862 if (Target.isIntrinsic) {
15863 unsigned IntNo = N->getConstantOperandVal(1);
15864 switch (IntNo) {
15865 default:
15866 llvm_unreachable("unexpected intrinsic for Neon base update");
15867 case Intrinsic::arm_neon_vld1:
15868 NewOpc = ARMISD::VLD1_UPD;
15869 NumVecs = 1;
15870 break;
15871 case Intrinsic::arm_neon_vld2:
15872 NewOpc = ARMISD::VLD2_UPD;
15873 NumVecs = 2;
15874 break;
15875 case Intrinsic::arm_neon_vld3:
15876 NewOpc = ARMISD::VLD3_UPD;
15877 NumVecs = 3;
15878 break;
15879 case Intrinsic::arm_neon_vld4:
15880 NewOpc = ARMISD::VLD4_UPD;
15881 NumVecs = 4;
15882 break;
15883 case Intrinsic::arm_neon_vld1x2:
15884 NewOpc = ARMISD::VLD1x2_UPD;
15885 NumVecs = 2;
15886 hasAlignment = false;
15887 break;
15888 case Intrinsic::arm_neon_vld1x3:
15889 NewOpc = ARMISD::VLD1x3_UPD;
15890 NumVecs = 3;
15891 hasAlignment = false;
15892 break;
15893 case Intrinsic::arm_neon_vld1x4:
15894 NewOpc = ARMISD::VLD1x4_UPD;
15895 NumVecs = 4;
15896 hasAlignment = false;
15897 break;
15898 case Intrinsic::arm_neon_vld2dup:
15899 NewOpc = ARMISD::VLD2DUP_UPD;
15900 NumVecs = 2;
15901 break;
15902 case Intrinsic::arm_neon_vld3dup:
15903 NewOpc = ARMISD::VLD3DUP_UPD;
15904 NumVecs = 3;
15905 break;
15906 case Intrinsic::arm_neon_vld4dup:
15907 NewOpc = ARMISD::VLD4DUP_UPD;
15908 NumVecs = 4;
15909 break;
15910 case Intrinsic::arm_neon_vld2lane:
15911 NewOpc = ARMISD::VLD2LN_UPD;
15912 NumVecs = 2;
15913 isLaneOp = true;
15914 break;
15915 case Intrinsic::arm_neon_vld3lane:
15916 NewOpc = ARMISD::VLD3LN_UPD;
15917 NumVecs = 3;
15918 isLaneOp = true;
15919 break;
15920 case Intrinsic::arm_neon_vld4lane:
15921 NewOpc = ARMISD::VLD4LN_UPD;
15922 NumVecs = 4;
15923 isLaneOp = true;
15924 break;
15925 case Intrinsic::arm_neon_vst1:
15926 NewOpc = ARMISD::VST1_UPD;
15927 NumVecs = 1;
15928 isLoadOp = false;
15929 break;
15930 case Intrinsic::arm_neon_vst2:
15931 NewOpc = ARMISD::VST2_UPD;
15932 NumVecs = 2;
15933 isLoadOp = false;
15934 break;
15935 case Intrinsic::arm_neon_vst3:
15936 NewOpc = ARMISD::VST3_UPD;
15937 NumVecs = 3;
15938 isLoadOp = false;
15939 break;
15940 case Intrinsic::arm_neon_vst4:
15941 NewOpc = ARMISD::VST4_UPD;
15942 NumVecs = 4;
15943 isLoadOp = false;
15944 break;
15945 case Intrinsic::arm_neon_vst2lane:
15946 NewOpc = ARMISD::VST2LN_UPD;
15947 NumVecs = 2;
15948 isLoadOp = false;
15949 isLaneOp = true;
15950 break;
15951 case Intrinsic::arm_neon_vst3lane:
15952 NewOpc = ARMISD::VST3LN_UPD;
15953 NumVecs = 3;
15954 isLoadOp = false;
15955 isLaneOp = true;
15956 break;
15957 case Intrinsic::arm_neon_vst4lane:
15958 NewOpc = ARMISD::VST4LN_UPD;
15959 NumVecs = 4;
15960 isLoadOp = false;
15961 isLaneOp = true;
15962 break;
15963 case Intrinsic::arm_neon_vst1x2:
15964 NewOpc = ARMISD::VST1x2_UPD;
15965 NumVecs = 2;
15966 isLoadOp = false;
15967 hasAlignment = false;
15968 break;
15969 case Intrinsic::arm_neon_vst1x3:
15970 NewOpc = ARMISD::VST1x3_UPD;
15971 NumVecs = 3;
15972 isLoadOp = false;
15973 hasAlignment = false;
15974 break;
15975 case Intrinsic::arm_neon_vst1x4:
15976 NewOpc = ARMISD::VST1x4_UPD;
15977 NumVecs = 4;
15978 isLoadOp = false;
15979 hasAlignment = false;
15980 break;
15981 }
15982 } else {
15983 isLaneOp = true;
15984 switch (N->getOpcode()) {
15985 default:
15986 llvm_unreachable("unexpected opcode for Neon base update");
15987 case ARMISD::VLD1DUP:
15988 NewOpc = ARMISD::VLD1DUP_UPD;
15989 NumVecs = 1;
15990 break;
15991 case ARMISD::VLD2DUP:
15992 NewOpc = ARMISD::VLD2DUP_UPD;
15993 NumVecs = 2;
15994 break;
15995 case ARMISD::VLD3DUP:
15996 NewOpc = ARMISD::VLD3DUP_UPD;
15997 NumVecs = 3;
15998 break;
15999 case ARMISD::VLD4DUP:
16000 NewOpc = ARMISD::VLD4DUP_UPD;
16001 NumVecs = 4;
16002 break;
16003 case ISD::LOAD:
16004 NewOpc = ARMISD::VLD1_UPD;
16005 NumVecs = 1;
16006 isLaneOp = false;
16007 break;
16008 case ISD::STORE:
16009 NewOpc = ARMISD::VST1_UPD;
16010 NumVecs = 1;
16011 isLaneOp = false;
16012 isLoadOp = false;
16013 break;
16014 }
16015 }
16016
16017 // Find the size of memory referenced by the load/store.
16018 EVT VecTy;
16019 if (isLoadOp) {
16020 VecTy = N->getValueType(0);
16021 } else if (Target.isIntrinsic) {
16022 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
16023 } else {
16024 assert(Target.isStore &&
16025 "Node has to be a load, a store, or an intrinsic!");
16026 VecTy = N->getOperand(1).getValueType();
16027 }
16028
16029 bool isVLDDUPOp =
16030 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
16031 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
16032
16033 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16034 if (isLaneOp || isVLDDUPOp)
16035 NumBytes /= VecTy.getVectorNumElements();
16036
16037 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
16038 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
16039 // separate instructions that make it harder to use a non-constant update.
16040 return false;
16041 }
16042
16043 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16044 return false;
16045
16046 // OK, we found an ADD we can fold into the base update.
16047 // Now, create a _UPD node, taking care of not breaking alignment.
16048
16049 EVT AlignedVecTy = VecTy;
16050 Align Alignment = MemN->getAlign();
16051
16052 // If this is a less-than-standard-aligned load/store, change the type to
16053 // match the standard alignment.
16054 // The alignment is overlooked when selecting _UPD variants; and it's
16055 // easier to introduce bitcasts here than fix that.
16056 // There are 3 ways to get to this base-update combine:
16057 // - intrinsics: they are assumed to be properly aligned (to the standard
16058 // alignment of the memory type), so we don't need to do anything.
16059 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16060 // intrinsics, so, likewise, there's nothing to do.
16061 // - generic load/store instructions: the alignment is specified as an
16062 // explicit operand, rather than implicitly as the standard alignment
16063 // of the memory type (like the intrisics). We need to change the
16064 // memory type to match the explicit alignment. That way, we don't
16065 // generate non-standard-aligned ARMISD::VLDx nodes.
16066 if (isa<LSBaseSDNode>(N)) {
16067 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16068 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16069 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16070 assert(!isLaneOp && "Unexpected generic load/store lane.");
16071 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16072 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16073 }
16074 // Don't set an explicit alignment on regular load/stores that we want
16075 // to transform to VLD/VST 1_UPD nodes.
16076 // This matches the behavior of regular load/stores, which only get an
16077 // explicit alignment if the MMO alignment is larger than the standard
16078 // alignment of the memory type.
16079 // Intrinsics, however, always get an explicit alignment, set to the
16080 // alignment of the MMO.
16081 Alignment = Align(1);
16082 }
16083
16084 // Create the new updating load/store node.
16085 // First, create an SDVTList for the new updating node's results.
16086 EVT Tys[6];
16087 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16088 unsigned n;
16089 for (n = 0; n < NumResultVecs; ++n)
16090 Tys[n] = AlignedVecTy;
16091 Tys[n++] = MVT::i32;
16092 Tys[n] = MVT::Other;
16093 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16094
16095 // Then, gather the new node's operands.
16097 Ops.push_back(N->getOperand(0)); // incoming chain
16098 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16099 Ops.push_back(User.Inc);
16100
16101 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16102 // Try to match the intrinsic's signature
16103 Ops.push_back(StN->getValue());
16104 } else {
16105 // Loads (and of course intrinsics) match the intrinsics' signature,
16106 // so just add all but the alignment operand.
16107 unsigned LastOperand =
16108 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16109 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16110 Ops.push_back(N->getOperand(i));
16111 }
16112
16113 // For all node types, the alignment operand is always the last one.
16114 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16115
16116 // If this is a non-standard-aligned STORE, the penultimate operand is the
16117 // stored value. Bitcast it to the aligned type.
16118 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16119 SDValue &StVal = Ops[Ops.size() - 2];
16120 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16121 }
16122
16123 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16124 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16125 MemN->getMemOperand());
16126
16127 // Update the uses.
16128 SmallVector<SDValue, 5> NewResults;
16129 for (unsigned i = 0; i < NumResultVecs; ++i)
16130 NewResults.push_back(SDValue(UpdN.getNode(), i));
16131
16132 // If this is an non-standard-aligned LOAD, the first result is the loaded
16133 // value. Bitcast it to the expected result type.
16134 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16135 SDValue &LdVal = NewResults[0];
16136 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16137 }
16138
16139 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16140 DCI.CombineTo(N, NewResults);
16141 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16142
16143 return true;
16144}
16145
16146// If (opcode ptr inc) is and ADD-like instruction, return the
16147// increment value. Otherwise return 0.
16148static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16149 SDValue Inc, const SelectionDAG &DAG) {
16150 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16151 if (!CInc)
16152 return 0;
16153
16154 switch (Opcode) {
16155 case ARMISD::VLD1_UPD:
16156 case ISD::ADD:
16157 return CInc->getZExtValue();
16158 case ISD::OR: {
16159 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16160 // (OR ptr inc) is the same as (ADD ptr inc)
16161 return CInc->getZExtValue();
16162 }
16163 return 0;
16164 }
16165 default:
16166 return 0;
16167 }
16168}
16169
16171 switch (N->getOpcode()) {
16172 case ISD::ADD:
16173 case ISD::OR: {
16174 if (isa<ConstantSDNode>(N->getOperand(1))) {
16175 *Ptr = N->getOperand(0);
16176 *CInc = N->getOperand(1);
16177 return true;
16178 }
16179 return false;
16180 }
16181 case ARMISD::VLD1_UPD: {
16182 if (isa<ConstantSDNode>(N->getOperand(2))) {
16183 *Ptr = N->getOperand(1);
16184 *CInc = N->getOperand(2);
16185 return true;
16186 }
16187 return false;
16188 }
16189 default:
16190 return false;
16191 }
16192}
16193
16195 // Check that the add is independent of the load/store.
16196 // Otherwise, folding it would create a cycle. Search through Addr
16197 // as well, since the User may not be a direct user of Addr and
16198 // only share a base pointer.
16201 Worklist.push_back(N);
16202 Worklist.push_back(User);
16203 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16204 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16205 return false;
16206 return true;
16207}
16208
16209/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16210/// NEON load/store intrinsics, and generic vector load/stores, to merge
16211/// base address updates.
16212/// For generic load/stores, the memory type is assumed to be a vector.
16213/// The caller is assumed to have checked legality.
16216 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16217 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16218 const bool isStore = N->getOpcode() == ISD::STORE;
16219 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16220 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16221
16222 SDValue Addr = N->getOperand(AddrOpIdx);
16223
16225
16226 // Search for a use of the address operand that is an increment.
16227 for (SDUse &Use : Addr->uses()) {
16228 SDNode *User = Use.getUser();
16229 if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)
16230 continue;
16231
16232 SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);
16233 unsigned ConstInc =
16234 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16235
16236 if (ConstInc || User->getOpcode() == ISD::ADD)
16237 BaseUpdates.push_back({User, Inc, ConstInc});
16238 }
16239
16240 // If the address is a constant pointer increment itself, find
16241 // another constant increment that has the same base operand
16242 SDValue Base;
16243 SDValue CInc;
16244 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16245 unsigned Offset =
16246 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16247 for (SDUse &Use : Base->uses()) {
16248
16249 SDNode *User = Use.getUser();
16250 if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||
16251 User->getNumOperands() != 2)
16252 continue;
16253
16254 SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);
16255 unsigned UserOffset =
16256 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16257
16258 if (!UserOffset || UserOffset <= Offset)
16259 continue;
16260
16261 unsigned NewConstInc = UserOffset - Offset;
16262 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16263 BaseUpdates.push_back({User, NewInc, NewConstInc});
16264 }
16265 }
16266
16267 // Try to fold the load/store with an update that matches memory
16268 // access size. This should work well for sequential loads.
16269 //
16270 // Filter out invalid updates as well.
16271 unsigned NumValidUpd = BaseUpdates.size();
16272 for (unsigned I = 0; I < NumValidUpd;) {
16273 BaseUpdateUser &User = BaseUpdates[I];
16274 if (!isValidBaseUpdate(N, User.N)) {
16275 --NumValidUpd;
16276 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16277 continue;
16278 }
16279
16280 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16281 return SDValue();
16282 ++I;
16283 }
16284 BaseUpdates.resize(NumValidUpd);
16285
16286 // Try to fold with other users. Non-constant updates are considered
16287 // first, and constant updates are sorted to not break a sequence of
16288 // strided accesses (if there is any).
16289 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16290 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16291 return LHS.ConstInc < RHS.ConstInc;
16292 });
16293 for (BaseUpdateUser &User : BaseUpdates) {
16294 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16295 return SDValue();
16296 }
16297 return SDValue();
16298}
16299
16302 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16303 return SDValue();
16304
16305 return CombineBaseUpdate(N, DCI);
16306}
16307
16310 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16311 return SDValue();
16312
16313 SelectionDAG &DAG = DCI.DAG;
16314 SDValue Addr = N->getOperand(2);
16315 MemSDNode *MemN = cast<MemSDNode>(N);
16316 SDLoc dl(N);
16317
16318 // For the stores, where there are multiple intrinsics we only actually want
16319 // to post-inc the last of the them.
16320 unsigned IntNo = N->getConstantOperandVal(1);
16321 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16322 return SDValue();
16323 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16324 return SDValue();
16325
16326 // Search for a use of the address operand that is an increment.
16327 for (SDUse &Use : Addr->uses()) {
16328 SDNode *User = Use.getUser();
16329 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
16330 continue;
16331
16332 // Check that the add is independent of the load/store. Otherwise, folding
16333 // it would create a cycle. We can avoid searching through Addr as it's a
16334 // predecessor to both.
16337 Visited.insert(Addr.getNode());
16338 Worklist.push_back(N);
16339 Worklist.push_back(User);
16340 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16341 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16342 continue;
16343
16344 // Find the new opcode for the updating load/store.
16345 bool isLoadOp = true;
16346 unsigned NewOpc = 0;
16347 unsigned NumVecs = 0;
16348 switch (IntNo) {
16349 default:
16350 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16351 case Intrinsic::arm_mve_vld2q:
16352 NewOpc = ARMISD::VLD2_UPD;
16353 NumVecs = 2;
16354 break;
16355 case Intrinsic::arm_mve_vld4q:
16356 NewOpc = ARMISD::VLD4_UPD;
16357 NumVecs = 4;
16358 break;
16359 case Intrinsic::arm_mve_vst2q:
16360 NewOpc = ARMISD::VST2_UPD;
16361 NumVecs = 2;
16362 isLoadOp = false;
16363 break;
16364 case Intrinsic::arm_mve_vst4q:
16365 NewOpc = ARMISD::VST4_UPD;
16366 NumVecs = 4;
16367 isLoadOp = false;
16368 break;
16369 }
16370
16371 // Find the size of memory referenced by the load/store.
16372 EVT VecTy;
16373 if (isLoadOp) {
16374 VecTy = N->getValueType(0);
16375 } else {
16376 VecTy = N->getOperand(3).getValueType();
16377 }
16378
16379 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16380
16381 // If the increment is a constant, it must match the memory ref size.
16382 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16383 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16384 if (!CInc || CInc->getZExtValue() != NumBytes)
16385 continue;
16386
16387 // Create the new updating load/store node.
16388 // First, create an SDVTList for the new updating node's results.
16389 EVT Tys[6];
16390 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16391 unsigned n;
16392 for (n = 0; n < NumResultVecs; ++n)
16393 Tys[n] = VecTy;
16394 Tys[n++] = MVT::i32;
16395 Tys[n] = MVT::Other;
16396 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16397
16398 // Then, gather the new node's operands.
16400 Ops.push_back(N->getOperand(0)); // incoming chain
16401 Ops.push_back(N->getOperand(2)); // ptr
16402 Ops.push_back(Inc);
16403
16404 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16405 Ops.push_back(N->getOperand(i));
16406
16407 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16408 MemN->getMemOperand());
16409
16410 // Update the uses.
16411 SmallVector<SDValue, 5> NewResults;
16412 for (unsigned i = 0; i < NumResultVecs; ++i)
16413 NewResults.push_back(SDValue(UpdN.getNode(), i));
16414
16415 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16416 DCI.CombineTo(N, NewResults);
16417 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16418
16419 break;
16420 }
16421
16422 return SDValue();
16423}
16424
16425/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16426/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16427/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16428/// return true.
16430 SelectionDAG &DAG = DCI.DAG;
16431 EVT VT = N->getValueType(0);
16432 // vldN-dup instructions only support 64-bit vectors for N > 1.
16433 if (!VT.is64BitVector())
16434 return false;
16435
16436 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16437 SDNode *VLD = N->getOperand(0).getNode();
16438 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16439 return false;
16440 unsigned NumVecs = 0;
16441 unsigned NewOpc = 0;
16442 unsigned IntNo = VLD->getConstantOperandVal(1);
16443 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16444 NumVecs = 2;
16445 NewOpc = ARMISD::VLD2DUP;
16446 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16447 NumVecs = 3;
16448 NewOpc = ARMISD::VLD3DUP;
16449 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16450 NumVecs = 4;
16451 NewOpc = ARMISD::VLD4DUP;
16452 } else {
16453 return false;
16454 }
16455
16456 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16457 // numbers match the load.
16458 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16459 for (SDUse &Use : VLD->uses()) {
16460 // Ignore uses of the chain result.
16461 if (Use.getResNo() == NumVecs)
16462 continue;
16463 SDNode *User = Use.getUser();
16464 if (User->getOpcode() != ARMISD::VDUPLANE ||
16465 VLDLaneNo != User->getConstantOperandVal(1))
16466 return false;
16467 }
16468
16469 // Create the vldN-dup node.
16470 EVT Tys[5];
16471 unsigned n;
16472 for (n = 0; n < NumVecs; ++n)
16473 Tys[n] = VT;
16474 Tys[n] = MVT::Other;
16475 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16476 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16477 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16478 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16479 Ops, VLDMemInt->getMemoryVT(),
16480 VLDMemInt->getMemOperand());
16481
16482 // Update the uses.
16483 for (SDUse &Use : VLD->uses()) {
16484 unsigned ResNo = Use.getResNo();
16485 // Ignore uses of the chain result.
16486 if (ResNo == NumVecs)
16487 continue;
16488 DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));
16489 }
16490
16491 // Now the vldN-lane intrinsic is dead except for its chain result.
16492 // Update uses of the chain.
16493 std::vector<SDValue> VLDDupResults;
16494 for (unsigned n = 0; n < NumVecs; ++n)
16495 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16496 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16497 DCI.CombineTo(VLD, VLDDupResults);
16498
16499 return true;
16500}
16501
16502/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16503/// ARMISD::VDUPLANE.
16506 const ARMSubtarget *Subtarget) {
16507 SDValue Op = N->getOperand(0);
16508 EVT VT = N->getValueType(0);
16509
16510 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16511 if (Subtarget->hasMVEIntegerOps()) {
16512 EVT ExtractVT = VT.getVectorElementType();
16513 // We need to ensure we are creating a legal type.
16514 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16515 ExtractVT = MVT::i32;
16516 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16517 N->getOperand(0), N->getOperand(1));
16518 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16519 }
16520
16521 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16522 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16523 if (CombineVLDDUP(N, DCI))
16524 return SDValue(N, 0);
16525
16526 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16527 // redundant. Ignore bit_converts for now; element sizes are checked below.
16528 while (Op.getOpcode() == ISD::BITCAST)
16529 Op = Op.getOperand(0);
16530 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16531 return SDValue();
16532
16533 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16534 unsigned EltSize = Op.getScalarValueSizeInBits();
16535 // The canonical VMOV for a zero vector uses a 32-bit element size.
16536 unsigned Imm = Op.getConstantOperandVal(0);
16537 unsigned EltBits;
16538 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16539 EltSize = 8;
16540 if (EltSize > VT.getScalarSizeInBits())
16541 return SDValue();
16542
16543 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16544}
16545
16546/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16548 const ARMSubtarget *Subtarget) {
16549 SDValue Op = N->getOperand(0);
16550 SDLoc dl(N);
16551
16552 if (Subtarget->hasMVEIntegerOps()) {
16553 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16554 // need to come from a GPR.
16555 if (Op.getValueType() == MVT::f32)
16556 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16557 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16558 else if (Op.getValueType() == MVT::f16)
16559 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16560 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16561 }
16562
16563 if (!Subtarget->hasNEON())
16564 return SDValue();
16565
16566 // Match VDUP(LOAD) -> VLD1DUP.
16567 // We match this pattern here rather than waiting for isel because the
16568 // transform is only legal for unindexed loads.
16569 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16570 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16571 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16572 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16573 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16574 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16575 SDValue VLDDup =
16576 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16577 LD->getMemoryVT(), LD->getMemOperand());
16578 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16579 return VLDDup;
16580 }
16581
16582 return SDValue();
16583}
16584
16587 const ARMSubtarget *Subtarget) {
16588 EVT VT = N->getValueType(0);
16589
16590 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16591 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16593 return CombineBaseUpdate(N, DCI);
16594
16595 return SDValue();
16596}
16597
16598// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16599// pack all of the elements in one place. Next, store to memory in fewer
16600// chunks.
16602 SelectionDAG &DAG) {
16603 SDValue StVal = St->getValue();
16604 EVT VT = StVal.getValueType();
16605 if (!St->isTruncatingStore() || !VT.isVector())
16606 return SDValue();
16607 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16608 EVT StVT = St->getMemoryVT();
16609 unsigned NumElems = VT.getVectorNumElements();
16610 assert(StVT != VT && "Cannot truncate to the same type");
16611 unsigned FromEltSz = VT.getScalarSizeInBits();
16612 unsigned ToEltSz = StVT.getScalarSizeInBits();
16613
16614 // From, To sizes and ElemCount must be pow of two
16615 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16616 return SDValue();
16617
16618 // We are going to use the original vector elt for storing.
16619 // Accumulated smaller vector elements must be a multiple of the store size.
16620 if (0 != (NumElems * FromEltSz) % ToEltSz)
16621 return SDValue();
16622
16623 unsigned SizeRatio = FromEltSz / ToEltSz;
16624 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16625
16626 // Create a type on which we perform the shuffle.
16627 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16628 NumElems * SizeRatio);
16629 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16630
16631 SDLoc DL(St);
16632 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16633 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16634 for (unsigned i = 0; i < NumElems; ++i)
16635 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16636 : i * SizeRatio;
16637
16638 // Can't shuffle using an illegal type.
16639 if (!TLI.isTypeLegal(WideVecVT))
16640 return SDValue();
16641
16642 SDValue Shuff = DAG.getVectorShuffle(
16643 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16644 // At this point all of the data is stored at the bottom of the
16645 // register. We now need to save it to mem.
16646
16647 // Find the largest store unit
16648 MVT StoreType = MVT::i8;
16649 for (MVT Tp : MVT::integer_valuetypes()) {
16650 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16651 StoreType = Tp;
16652 }
16653 // Didn't find a legal store type.
16654 if (!TLI.isTypeLegal(StoreType))
16655 return SDValue();
16656
16657 // Bitcast the original vector into a vector of store-size units
16658 EVT StoreVecVT =
16659 EVT::getVectorVT(*DAG.getContext(), StoreType,
16660 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16661 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16662 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16664 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16665 TLI.getPointerTy(DAG.getDataLayout()));
16666 SDValue BasePtr = St->getBasePtr();
16667
16668 // Perform one or more big stores into memory.
16669 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16670 for (unsigned I = 0; I < E; I++) {
16671 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16672 ShuffWide, DAG.getIntPtrConstant(I, DL));
16673 SDValue Ch =
16674 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16675 St->getAlign(), St->getMemOperand()->getFlags());
16676 BasePtr =
16677 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16678 Chains.push_back(Ch);
16679 }
16680 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16681}
16682
16683// Try taking a single vector store from an fpround (which would otherwise turn
16684// into an expensive buildvector) and splitting it into a series of narrowing
16685// stores.
16687 SelectionDAG &DAG) {
16688 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16689 return SDValue();
16690 SDValue Trunc = St->getValue();
16691 if (Trunc->getOpcode() != ISD::FP_ROUND)
16692 return SDValue();
16693 EVT FromVT = Trunc->getOperand(0).getValueType();
16694 EVT ToVT = Trunc.getValueType();
16695 if (!ToVT.isVector())
16696 return SDValue();
16698 EVT ToEltVT = ToVT.getVectorElementType();
16699 EVT FromEltVT = FromVT.getVectorElementType();
16700
16701 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16702 return SDValue();
16703
16704 unsigned NumElements = 4;
16705 if (FromVT.getVectorNumElements() % NumElements != 0)
16706 return SDValue();
16707
16708 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16709 // use the VMOVN over splitting the store. We are looking for patterns of:
16710 // !rev: 0 N 1 N+1 2 N+2 ...
16711 // rev: N 0 N+1 1 N+2 2 ...
16712 // The shuffle may either be a single source (in which case N = NumElts/2) or
16713 // two inputs extended with concat to the same size (in which case N =
16714 // NumElts).
16715 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16716 ArrayRef<int> M = SVN->getMask();
16717 unsigned NumElts = ToVT.getVectorNumElements();
16718 if (SVN->getOperand(1).isUndef())
16719 NumElts /= 2;
16720
16721 unsigned Off0 = Rev ? NumElts : 0;
16722 unsigned Off1 = Rev ? 0 : NumElts;
16723
16724 for (unsigned I = 0; I < NumElts; I += 2) {
16725 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16726 return false;
16727 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16728 return false;
16729 }
16730
16731 return true;
16732 };
16733
16734 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16735 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16736 return SDValue();
16737
16738 LLVMContext &C = *DAG.getContext();
16739 SDLoc DL(St);
16740 // Details about the old store
16741 SDValue Ch = St->getChain();
16742 SDValue BasePtr = St->getBasePtr();
16743 Align Alignment = St->getOriginalAlign();
16745 AAMDNodes AAInfo = St->getAAInfo();
16746
16747 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16748 // and then stored as truncating integer stores.
16749 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16750 EVT NewToVT = EVT::getVectorVT(
16751 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16752
16754 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16755 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16756 SDValue NewPtr =
16757 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16758
16759 SDValue Extract =
16760 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16761 DAG.getConstant(i * NumElements, DL, MVT::i32));
16762
16763 SDValue FPTrunc =
16764 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16765 Extract, DAG.getConstant(0, DL, MVT::i32));
16766 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16767
16768 SDValue Store = DAG.getTruncStore(
16769 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16770 NewToVT, Alignment, MMOFlags, AAInfo);
16771 Stores.push_back(Store);
16772 }
16773 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16774}
16775
16776// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16777// into an expensive buildvector) and splitting it into a series of narrowing
16778// stores.
16780 SelectionDAG &DAG) {
16781 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16782 return SDValue();
16783 SDValue Trunc = St->getValue();
16784 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16785 return SDValue();
16786 EVT FromVT = Trunc->getOperand(0).getValueType();
16787 EVT ToVT = Trunc.getValueType();
16788
16789 LLVMContext &C = *DAG.getContext();
16790 SDLoc DL(St);
16791 // Details about the old store
16792 SDValue Ch = St->getChain();
16793 SDValue BasePtr = St->getBasePtr();
16794 Align Alignment = St->getOriginalAlign();
16796 AAMDNodes AAInfo = St->getAAInfo();
16797
16798 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16799 FromVT.getVectorNumElements());
16800
16802 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16803 unsigned NewOffset =
16804 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16805 SDValue NewPtr =
16806 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16807
16808 SDValue Extract = Trunc.getOperand(i);
16809 SDValue Store = DAG.getTruncStore(
16810 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16811 NewToVT, Alignment, MMOFlags, AAInfo);
16812 Stores.push_back(Store);
16813 }
16814 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16815}
16816
16817// Given a floating point store from an extracted vector, with an integer
16818// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16819// help reduce fp register pressure, doesn't require the fp extract and allows
16820// use of more integer post-inc stores not available with vstr.
16822 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16823 return SDValue();
16824 SDValue Extract = St->getValue();
16825 EVT VT = Extract.getValueType();
16826 // For now only uses f16. This may be useful for f32 too, but that will
16827 // be bitcast(extract), not the VGETLANEu we currently check here.
16828 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16829 return SDValue();
16830
16831 SDNode *GetLane =
16832 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16833 {Extract.getOperand(0), Extract.getOperand(1)});
16834 if (!GetLane)
16835 return SDValue();
16836
16837 LLVMContext &C = *DAG.getContext();
16838 SDLoc DL(St);
16839 // Create a new integer store to replace the existing floating point version.
16840 SDValue Ch = St->getChain();
16841 SDValue BasePtr = St->getBasePtr();
16842 Align Alignment = St->getOriginalAlign();
16844 AAMDNodes AAInfo = St->getAAInfo();
16845 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16846 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16847 St->getPointerInfo(), NewToVT, Alignment,
16848 MMOFlags, AAInfo);
16849
16850 return Store;
16851}
16852
16853/// PerformSTORECombine - Target-specific dag combine xforms for
16854/// ISD::STORE.
16857 const ARMSubtarget *Subtarget) {
16858 StoreSDNode *St = cast<StoreSDNode>(N);
16859 if (St->isVolatile())
16860 return SDValue();
16861 SDValue StVal = St->getValue();
16862 EVT VT = StVal.getValueType();
16863
16864 if (Subtarget->hasNEON())
16865 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16866 return Store;
16867
16868 if (Subtarget->hasMVEFloatOps())
16869 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16870 return NewToken;
16871
16872 if (Subtarget->hasMVEIntegerOps()) {
16873 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16874 return NewChain;
16875 if (SDValue NewToken =
16877 return NewToken;
16878 }
16879
16880 if (!ISD::isNormalStore(St))
16881 return SDValue();
16882
16883 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16884 // ARM stores of arguments in the same cache line.
16885 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16886 StVal.getNode()->hasOneUse()) {
16887 SelectionDAG &DAG = DCI.DAG;
16888 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16889 SDLoc DL(St);
16890 SDValue BasePtr = St->getBasePtr();
16891 SDValue NewST1 = DAG.getStore(
16892 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16893 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16894 St->getMemOperand()->getFlags());
16895
16896 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16897 DAG.getConstant(4, DL, MVT::i32));
16898 return DAG.getStore(NewST1.getValue(0), DL,
16899 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16900 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16901 St->getOriginalAlign(),
16902 St->getMemOperand()->getFlags());
16903 }
16904
16905 if (StVal.getValueType() == MVT::i64 &&
16907
16908 // Bitcast an i64 store extracted from a vector to f64.
16909 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16910 SelectionDAG &DAG = DCI.DAG;
16911 SDLoc dl(StVal);
16912 SDValue IntVec = StVal.getOperand(0);
16913 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16915 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16916 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16917 Vec, StVal.getOperand(1));
16918 dl = SDLoc(N);
16919 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16920 // Make the DAGCombiner fold the bitcasts.
16921 DCI.AddToWorklist(Vec.getNode());
16922 DCI.AddToWorklist(ExtElt.getNode());
16923 DCI.AddToWorklist(V.getNode());
16924 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16925 St->getPointerInfo(), St->getAlign(),
16926 St->getMemOperand()->getFlags(), St->getAAInfo());
16927 }
16928
16929 // If this is a legal vector store, try to combine it into a VST1_UPD.
16930 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16932 return CombineBaseUpdate(N, DCI);
16933
16934 return SDValue();
16935}
16936
16937/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16938/// can replace combinations of VMUL and VCVT (floating-point to integer)
16939/// when the VMUL has a constant operand that is a power of 2.
16940///
16941/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16942/// vmul.f32 d16, d17, d16
16943/// vcvt.s32.f32 d16, d16
16944/// becomes:
16945/// vcvt.s32.f32 d16, d16, #3
16947 const ARMSubtarget *Subtarget) {
16948 if (!Subtarget->hasNEON())
16949 return SDValue();
16950
16951 SDValue Op = N->getOperand(0);
16952 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16953 Op.getOpcode() != ISD::FMUL)
16954 return SDValue();
16955
16956 SDValue ConstVec = Op->getOperand(1);
16957 if (!isa<BuildVectorSDNode>(ConstVec))
16958 return SDValue();
16959
16960 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16961 uint32_t FloatBits = FloatTy.getSizeInBits();
16962 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16963 uint32_t IntBits = IntTy.getSizeInBits();
16964 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16965 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16966 // These instructions only exist converting from f32 to i32. We can handle
16967 // smaller integers by generating an extra truncate, but larger ones would
16968 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16969 // these intructions only support v2i32/v4i32 types.
16970 return SDValue();
16971 }
16972
16973 BitVector UndefElements;
16974 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16975 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16976 if (C == -1 || C == 0 || C > 32)
16977 return SDValue();
16978
16979 SDLoc dl(N);
16980 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16981 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16982 Intrinsic::arm_neon_vcvtfp2fxu;
16983 SDValue FixConv = DAG.getNode(
16984 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16985 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16986 DAG.getConstant(C, dl, MVT::i32));
16987
16988 if (IntBits < FloatBits)
16989 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16990
16991 return FixConv;
16992}
16993
16995 const ARMSubtarget *Subtarget) {
16996 if (!Subtarget->hasMVEFloatOps())
16997 return SDValue();
16998
16999 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
17000 // The second form can be more easily turned into a predicated vadd, and
17001 // possibly combined into a fma to become a predicated vfma.
17002 SDValue Op0 = N->getOperand(0);
17003 SDValue Op1 = N->getOperand(1);
17004 EVT VT = N->getValueType(0);
17005 SDLoc DL(N);
17006
17007 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
17008 // which these VMOV's represent.
17009 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
17010 if (Op.getOpcode() != ISD::BITCAST ||
17011 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
17012 return false;
17013 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
17014 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
17015 return true;
17016 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
17017 return true;
17018 return false;
17019 };
17020
17021 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
17022 std::swap(Op0, Op1);
17023
17024 if (Op1.getOpcode() != ISD::VSELECT)
17025 return SDValue();
17026
17027 SDNodeFlags FaddFlags = N->getFlags();
17028 bool NSZ = FaddFlags.hasNoSignedZeros();
17029 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
17030 return SDValue();
17031
17032 SDValue FAdd =
17033 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
17034 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
17035}
17036
17038 SDValue LHS = N->getOperand(0);
17039 SDValue RHS = N->getOperand(1);
17040 EVT VT = N->getValueType(0);
17041 SDLoc DL(N);
17042
17043 if (!N->getFlags().hasAllowReassociation())
17044 return SDValue();
17045
17046 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17047 auto ReassocComplex = [&](SDValue A, SDValue B) {
17048 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17049 return SDValue();
17050 unsigned Opc = A.getConstantOperandVal(0);
17051 if (Opc != Intrinsic::arm_mve_vcmlaq)
17052 return SDValue();
17053 SDValue VCMLA = DAG.getNode(
17054 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17055 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17056 A.getOperand(3), A.getOperand(4));
17057 VCMLA->setFlags(A->getFlags());
17058 return VCMLA;
17059 };
17060 if (SDValue R = ReassocComplex(LHS, RHS))
17061 return R;
17062 if (SDValue R = ReassocComplex(RHS, LHS))
17063 return R;
17064
17065 return SDValue();
17066}
17067
17069 const ARMSubtarget *Subtarget) {
17070 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17071 return S;
17072 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17073 return S;
17074 return SDValue();
17075}
17076
17077/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17078/// can replace combinations of VCVT (integer to floating-point) and VMUL
17079/// when the VMUL has a constant operand that is a power of 2.
17080///
17081/// Example (assume d17 = <float 0.125, float 0.125>):
17082/// vcvt.f32.s32 d16, d16
17083/// vmul.f32 d16, d16, d17
17084/// becomes:
17085/// vcvt.f32.s32 d16, d16, #3
17087 const ARMSubtarget *Subtarget) {
17088 if (!Subtarget->hasNEON())
17089 return SDValue();
17090
17091 SDValue Op = N->getOperand(0);
17092 unsigned OpOpcode = Op.getNode()->getOpcode();
17093 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17094 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17095 return SDValue();
17096
17097 SDValue ConstVec = N->getOperand(1);
17098 if (!isa<BuildVectorSDNode>(ConstVec))
17099 return SDValue();
17100
17101 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17102 uint32_t FloatBits = FloatTy.getSizeInBits();
17103 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17104 uint32_t IntBits = IntTy.getSizeInBits();
17105 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17106 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17107 // These instructions only exist converting from i32 to f32. We can handle
17108 // smaller integers by generating an extra extend, but larger ones would
17109 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17110 // these intructions only support v2i32/v4i32 types.
17111 return SDValue();
17112 }
17113
17114 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17115 APFloat Recip(0.0f);
17116 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17117 return SDValue();
17118
17119 bool IsExact;
17120 APSInt IntVal(33);
17121 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17122 APFloat::opOK ||
17123 !IsExact)
17124 return SDValue();
17125
17126 int32_t C = IntVal.exactLogBase2();
17127 if (C == -1 || C == 0 || C > 32)
17128 return SDValue();
17129
17130 SDLoc DL(N);
17131 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17132 SDValue ConvInput = Op.getOperand(0);
17133 if (IntBits < FloatBits)
17135 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17136
17137 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17138 : Intrinsic::arm_neon_vcvtfxu2fp;
17139 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17140 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17141 DAG.getConstant(C, DL, MVT::i32));
17142}
17143
17145 const ARMSubtarget *ST) {
17146 if (!ST->hasMVEIntegerOps())
17147 return SDValue();
17148
17149 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17150 EVT ResVT = N->getValueType(0);
17151 SDValue N0 = N->getOperand(0);
17152 SDLoc dl(N);
17153
17154 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17155 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17156 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17157 N0.getValueType() == MVT::v16i8)) {
17158 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17159 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17160 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17161 }
17162
17163 // We are looking for something that will have illegal types if left alone,
17164 // but that we can convert to a single instruction under MVE. For example
17165 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17166 // or
17167 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17168
17169 // The legal cases are:
17170 // VADDV u/s 8/16/32
17171 // VMLAV u/s 8/16/32
17172 // VADDLV u/s 32
17173 // VMLALV u/s 16/32
17174
17175 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17176 // extend it and use v4i32 instead.
17177 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17178 EVT AVT = A.getValueType();
17179 return any_of(ExtTypes, [&](MVT Ty) {
17180 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17181 AVT.bitsLE(Ty);
17182 });
17183 };
17184 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17185 EVT AVT = A.getValueType();
17186 if (!AVT.is128BitVector())
17187 A = DAG.getNode(ExtendCode, dl,
17189 128 / AVT.getVectorMinNumElements())),
17190 A);
17191 return A;
17192 };
17193 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17194 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17195 return SDValue();
17196 SDValue A = N0->getOperand(0);
17197 if (ExtTypeMatches(A, ExtTypes))
17198 return ExtendIfNeeded(A, ExtendCode);
17199 return SDValue();
17200 };
17201 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17202 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17203 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17205 return SDValue();
17206 Mask = N0->getOperand(0);
17207 SDValue Ext = N0->getOperand(1);
17208 if (Ext->getOpcode() != ExtendCode)
17209 return SDValue();
17210 SDValue A = Ext->getOperand(0);
17211 if (ExtTypeMatches(A, ExtTypes))
17212 return ExtendIfNeeded(A, ExtendCode);
17213 return SDValue();
17214 };
17215 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17216 SDValue &A, SDValue &B) {
17217 // For a vmla we are trying to match a larger pattern:
17218 // ExtA = sext/zext A
17219 // ExtB = sext/zext B
17220 // Mul = mul ExtA, ExtB
17221 // vecreduce.add Mul
17222 // There might also be en extra extend between the mul and the addreduce, so
17223 // long as the bitwidth is high enough to make them equivalent (for example
17224 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17225 if (ResVT != RetTy)
17226 return false;
17227 SDValue Mul = N0;
17228 if (Mul->getOpcode() == ExtendCode &&
17229 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17230 ResVT.getScalarSizeInBits())
17231 Mul = Mul->getOperand(0);
17232 if (Mul->getOpcode() != ISD::MUL)
17233 return false;
17234 SDValue ExtA = Mul->getOperand(0);
17235 SDValue ExtB = Mul->getOperand(1);
17236 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17237 return false;
17238 A = ExtA->getOperand(0);
17239 B = ExtB->getOperand(0);
17240 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17241 A = ExtendIfNeeded(A, ExtendCode);
17242 B = ExtendIfNeeded(B, ExtendCode);
17243 return true;
17244 }
17245 return false;
17246 };
17247 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17248 SDValue &A, SDValue &B, SDValue &Mask) {
17249 // Same as the pattern above with a select for the zero predicated lanes
17250 // ExtA = sext/zext A
17251 // ExtB = sext/zext B
17252 // Mul = mul ExtA, ExtB
17253 // N0 = select Mask, Mul, 0
17254 // vecreduce.add N0
17255 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17257 return false;
17258 Mask = N0->getOperand(0);
17259 SDValue Mul = N0->getOperand(1);
17260 if (Mul->getOpcode() == ExtendCode &&
17261 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17262 ResVT.getScalarSizeInBits())
17263 Mul = Mul->getOperand(0);
17264 if (Mul->getOpcode() != ISD::MUL)
17265 return false;
17266 SDValue ExtA = Mul->getOperand(0);
17267 SDValue ExtB = Mul->getOperand(1);
17268 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17269 return false;
17270 A = ExtA->getOperand(0);
17271 B = ExtB->getOperand(0);
17272 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17273 A = ExtendIfNeeded(A, ExtendCode);
17274 B = ExtendIfNeeded(B, ExtendCode);
17275 return true;
17276 }
17277 return false;
17278 };
17279 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17280 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17281 // reductions. The operands are extended with MVEEXT, but as they are
17282 // reductions the lane orders do not matter. MVEEXT may be combined with
17283 // loads to produce two extending loads, or else they will be expanded to
17284 // VREV/VMOVL.
17285 EVT VT = Ops[0].getValueType();
17286 if (VT == MVT::v16i8) {
17287 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17288 "Unexpected illegal long reduction opcode");
17289 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17290
17291 SDValue Ext0 =
17292 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17293 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17294 SDValue Ext1 =
17295 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17296 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17297
17298 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17299 Ext0, Ext1);
17300 SDValue MLA1 =
17301 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17302 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17303 Ext0.getValue(1), Ext1.getValue(1));
17304 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17305 }
17306 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17307 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17308 SDValue(Node.getNode(), 1));
17309 };
17310
17311 SDValue A, B;
17312 SDValue Mask;
17313 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17314 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17315 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17316 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17317 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17318 A, B))
17319 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17320 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17321 A, B))
17322 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17323 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17324 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17325 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17326 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17327 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17328 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17329
17330 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17331 Mask))
17332 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17333 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17334 Mask))
17335 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17336 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17337 Mask))
17338 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17339 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17340 Mask))
17341 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17342 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17343 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17344 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17345 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17346 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17347 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17348
17349 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17350 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17351 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17352 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17353 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17354 return Create64bitNode(ARMISD::VADDLVs, {A});
17355 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17356 return Create64bitNode(ARMISD::VADDLVu, {A});
17357 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17358 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17359 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17360 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17361 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17362 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17363
17364 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17365 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17366 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17367 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17368 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17369 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17370 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17371 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17372 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17373 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17374 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17375 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17376 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17377 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17378
17379 // Some complications. We can get a case where the two inputs of the mul are
17380 // the same, then the output sext will have been helpfully converted to a
17381 // zext. Turn it back.
17382 SDValue Op = N0;
17383 if (Op->getOpcode() == ISD::VSELECT)
17384 Op = Op->getOperand(1);
17385 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17386 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17387 SDValue Mul = Op->getOperand(0);
17388 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17389 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17390 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17391 if (Op != N0)
17392 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17393 N0->getOperand(0), Ext, N0->getOperand(2));
17394 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17395 }
17396 }
17397
17398 return SDValue();
17399}
17400
17401// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17402// the lanes are used. Due to the reduction being commutative the shuffle can be
17403// removed.
17405 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17406 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17407 if (!Shuf || !Shuf->getOperand(1).isUndef())
17408 return SDValue();
17409
17410 // Check all elements are used once in the mask.
17411 ArrayRef<int> Mask = Shuf->getMask();
17412 APInt SetElts(Mask.size(), 0);
17413 for (int E : Mask) {
17414 if (E < 0 || E >= (int)Mask.size())
17415 return SDValue();
17416 SetElts.setBit(E);
17417 }
17418 if (!SetElts.isAllOnes())
17419 return SDValue();
17420
17421 if (N->getNumOperands() != VecOp + 1) {
17422 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17423 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17424 return SDValue();
17425 }
17426
17428 for (SDValue Op : N->ops()) {
17429 if (Op.getValueType().isVector())
17430 Ops.push_back(Op.getOperand(0));
17431 else
17432 Ops.push_back(Op);
17433 }
17434 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17435}
17436
17439 SDValue Op0 = N->getOperand(0);
17440 SDValue Op1 = N->getOperand(1);
17441 unsigned IsTop = N->getConstantOperandVal(2);
17442
17443 // VMOVNT a undef -> a
17444 // VMOVNB a undef -> a
17445 // VMOVNB undef a -> a
17446 if (Op1->isUndef())
17447 return Op0;
17448 if (Op0->isUndef() && !IsTop)
17449 return Op1;
17450
17451 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17452 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17453 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17454 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17455 Op1->getConstantOperandVal(2) == 0)
17456 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17457 Op0, Op1->getOperand(1), N->getOperand(2));
17458
17459 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17460 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17461 // into the top or bottom lanes.
17462 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17463 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17464 APInt Op0DemandedElts =
17465 IsTop ? Op1DemandedElts
17466 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17467
17468 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17469 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17470 return SDValue(N, 0);
17471 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17472 return SDValue(N, 0);
17473
17474 return SDValue();
17475}
17476
17479 SDValue Op0 = N->getOperand(0);
17480 unsigned IsTop = N->getConstantOperandVal(2);
17481
17482 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17483 APInt Op0DemandedElts =
17484 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17485 : APInt::getHighBitsSet(2, 1));
17486
17487 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17488 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17489 return SDValue(N, 0);
17490 return SDValue();
17491}
17492
17495 EVT VT = N->getValueType(0);
17496 SDValue LHS = N->getOperand(0);
17497 SDValue RHS = N->getOperand(1);
17498
17499 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17500 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17501 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17502 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17503 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17504 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17505 SDLoc DL(N);
17506 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17507 LHS.getOperand(0), RHS.getOperand(0));
17508 SDValue UndefV = LHS.getOperand(1);
17509 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17510 }
17511 return SDValue();
17512}
17513
17515 SDLoc DL(N);
17516 SDValue Op0 = N->getOperand(0);
17517 SDValue Op1 = N->getOperand(1);
17518
17519 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17520 // uses of the intrinsics.
17521 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17522 int ShiftAmt = C->getSExtValue();
17523 if (ShiftAmt == 0) {
17524 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17525 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17526 return SDValue();
17527 }
17528
17529 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17530 unsigned NewOpcode =
17531 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17532 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17533 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17534 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17535 return NewShift;
17536 }
17537 }
17538
17539 return SDValue();
17540}
17541
17542/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17544 DAGCombinerInfo &DCI) const {
17545 SelectionDAG &DAG = DCI.DAG;
17546 unsigned IntNo = N->getConstantOperandVal(0);
17547 switch (IntNo) {
17548 default:
17549 // Don't do anything for most intrinsics.
17550 break;
17551
17552 // Vector shifts: check for immediate versions and lower them.
17553 // Note: This is done during DAG combining instead of DAG legalizing because
17554 // the build_vectors for 64-bit vector element shift counts are generally
17555 // not legal, and it is hard to see their values after they get legalized to
17556 // loads from a constant pool.
17557 case Intrinsic::arm_neon_vshifts:
17558 case Intrinsic::arm_neon_vshiftu:
17559 case Intrinsic::arm_neon_vrshifts:
17560 case Intrinsic::arm_neon_vrshiftu:
17561 case Intrinsic::arm_neon_vrshiftn:
17562 case Intrinsic::arm_neon_vqshifts:
17563 case Intrinsic::arm_neon_vqshiftu:
17564 case Intrinsic::arm_neon_vqshiftsu:
17565 case Intrinsic::arm_neon_vqshiftns:
17566 case Intrinsic::arm_neon_vqshiftnu:
17567 case Intrinsic::arm_neon_vqshiftnsu:
17568 case Intrinsic::arm_neon_vqrshiftns:
17569 case Intrinsic::arm_neon_vqrshiftnu:
17570 case Intrinsic::arm_neon_vqrshiftnsu: {
17571 EVT VT = N->getOperand(1).getValueType();
17572 int64_t Cnt;
17573 unsigned VShiftOpc = 0;
17574
17575 switch (IntNo) {
17576 case Intrinsic::arm_neon_vshifts:
17577 case Intrinsic::arm_neon_vshiftu:
17578 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17579 VShiftOpc = ARMISD::VSHLIMM;
17580 break;
17581 }
17582 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17583 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17585 break;
17586 }
17587 return SDValue();
17588
17589 case Intrinsic::arm_neon_vrshifts:
17590 case Intrinsic::arm_neon_vrshiftu:
17591 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17592 break;
17593 return SDValue();
17594
17595 case Intrinsic::arm_neon_vqshifts:
17596 case Intrinsic::arm_neon_vqshiftu:
17597 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17598 break;
17599 return SDValue();
17600
17601 case Intrinsic::arm_neon_vqshiftsu:
17602 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17603 break;
17604 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17605
17606 case Intrinsic::arm_neon_vrshiftn:
17607 case Intrinsic::arm_neon_vqshiftns:
17608 case Intrinsic::arm_neon_vqshiftnu:
17609 case Intrinsic::arm_neon_vqshiftnsu:
17610 case Intrinsic::arm_neon_vqrshiftns:
17611 case Intrinsic::arm_neon_vqrshiftnu:
17612 case Intrinsic::arm_neon_vqrshiftnsu:
17613 // Narrowing shifts require an immediate right shift.
17614 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17615 break;
17616 llvm_unreachable("invalid shift count for narrowing vector shift "
17617 "intrinsic");
17618
17619 default:
17620 llvm_unreachable("unhandled vector shift");
17621 }
17622
17623 switch (IntNo) {
17624 case Intrinsic::arm_neon_vshifts:
17625 case Intrinsic::arm_neon_vshiftu:
17626 // Opcode already set above.
17627 break;
17628 case Intrinsic::arm_neon_vrshifts:
17629 VShiftOpc = ARMISD::VRSHRsIMM;
17630 break;
17631 case Intrinsic::arm_neon_vrshiftu:
17632 VShiftOpc = ARMISD::VRSHRuIMM;
17633 break;
17634 case Intrinsic::arm_neon_vrshiftn:
17635 VShiftOpc = ARMISD::VRSHRNIMM;
17636 break;
17637 case Intrinsic::arm_neon_vqshifts:
17638 VShiftOpc = ARMISD::VQSHLsIMM;
17639 break;
17640 case Intrinsic::arm_neon_vqshiftu:
17641 VShiftOpc = ARMISD::VQSHLuIMM;
17642 break;
17643 case Intrinsic::arm_neon_vqshiftsu:
17644 VShiftOpc = ARMISD::VQSHLsuIMM;
17645 break;
17646 case Intrinsic::arm_neon_vqshiftns:
17647 VShiftOpc = ARMISD::VQSHRNsIMM;
17648 break;
17649 case Intrinsic::arm_neon_vqshiftnu:
17650 VShiftOpc = ARMISD::VQSHRNuIMM;
17651 break;
17652 case Intrinsic::arm_neon_vqshiftnsu:
17653 VShiftOpc = ARMISD::VQSHRNsuIMM;
17654 break;
17655 case Intrinsic::arm_neon_vqrshiftns:
17656 VShiftOpc = ARMISD::VQRSHRNsIMM;
17657 break;
17658 case Intrinsic::arm_neon_vqrshiftnu:
17659 VShiftOpc = ARMISD::VQRSHRNuIMM;
17660 break;
17661 case Intrinsic::arm_neon_vqrshiftnsu:
17662 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17663 break;
17664 }
17665
17666 SDLoc dl(N);
17667 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17668 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17669 }
17670
17671 case Intrinsic::arm_neon_vshiftins: {
17672 EVT VT = N->getOperand(1).getValueType();
17673 int64_t Cnt;
17674 unsigned VShiftOpc = 0;
17675
17676 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17677 VShiftOpc = ARMISD::VSLIIMM;
17678 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17679 VShiftOpc = ARMISD::VSRIIMM;
17680 else {
17681 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17682 }
17683
17684 SDLoc dl(N);
17685 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17686 N->getOperand(1), N->getOperand(2),
17687 DAG.getConstant(Cnt, dl, MVT::i32));
17688 }
17689
17690 case Intrinsic::arm_neon_vqrshifts:
17691 case Intrinsic::arm_neon_vqrshiftu:
17692 // No immediate versions of these to check for.
17693 break;
17694
17695 case Intrinsic::arm_neon_vbsl: {
17696 SDLoc dl(N);
17697 return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
17698 N->getOperand(2), N->getOperand(3));
17699 }
17700 case Intrinsic::arm_mve_vqdmlah:
17701 case Intrinsic::arm_mve_vqdmlash:
17702 case Intrinsic::arm_mve_vqrdmlah:
17703 case Intrinsic::arm_mve_vqrdmlash:
17704 case Intrinsic::arm_mve_vmla_n_predicated:
17705 case Intrinsic::arm_mve_vmlas_n_predicated:
17706 case Intrinsic::arm_mve_vqdmlah_predicated:
17707 case Intrinsic::arm_mve_vqdmlash_predicated:
17708 case Intrinsic::arm_mve_vqrdmlah_predicated:
17709 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17710 // These intrinsics all take an i32 scalar operand which is narrowed to the
17711 // size of a single lane of the vector type they return. So we don't need
17712 // any bits of that operand above that point, which allows us to eliminate
17713 // uxth/sxth.
17714 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17715 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17716 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17717 return SDValue();
17718 break;
17719 }
17720
17721 case Intrinsic::arm_mve_minv:
17722 case Intrinsic::arm_mve_maxv:
17723 case Intrinsic::arm_mve_minav:
17724 case Intrinsic::arm_mve_maxav:
17725 case Intrinsic::arm_mve_minv_predicated:
17726 case Intrinsic::arm_mve_maxv_predicated:
17727 case Intrinsic::arm_mve_minav_predicated:
17728 case Intrinsic::arm_mve_maxav_predicated: {
17729 // These intrinsics all take an i32 scalar operand which is narrowed to the
17730 // size of a single lane of the vector type they take as the other input.
17731 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17732 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17733 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17734 return SDValue();
17735 break;
17736 }
17737
17738 case Intrinsic::arm_mve_addv: {
17739 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17740 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17741 bool Unsigned = N->getConstantOperandVal(2);
17742 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17743 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17744 }
17745
17746 case Intrinsic::arm_mve_addlv:
17747 case Intrinsic::arm_mve_addlv_predicated: {
17748 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17749 // which recombines the two outputs into an i64
17750 bool Unsigned = N->getConstantOperandVal(2);
17751 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17754
17756 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17757 if (i != 2) // skip the unsigned flag
17758 Ops.push_back(N->getOperand(i));
17759
17760 SDLoc dl(N);
17761 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17762 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17763 val.getValue(1));
17764 }
17765 }
17766
17767 return SDValue();
17768}
17769
17770/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17771/// lowers them. As with the vector shift intrinsics, this is done during DAG
17772/// combining instead of DAG legalizing because the build_vectors for 64-bit
17773/// vector element shift counts are generally not legal, and it is hard to see
17774/// their values after they get legalized to loads from a constant pool.
17777 const ARMSubtarget *ST) {
17778 SelectionDAG &DAG = DCI.DAG;
17779 EVT VT = N->getValueType(0);
17780
17781 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17782 N->getOperand(0)->getOpcode() == ISD::AND &&
17783 N->getOperand(0)->hasOneUse()) {
17784 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17785 return SDValue();
17786 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17787 // usually show up because instcombine prefers to canonicalize it to
17788 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17789 // out of GEP lowering in some cases.
17790 SDValue N0 = N->getOperand(0);
17791 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17792 if (!ShiftAmtNode)
17793 return SDValue();
17794 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17795 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17796 if (!AndMaskNode)
17797 return SDValue();
17798 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17799 // Don't transform uxtb/uxth.
17800 if (AndMask == 255 || AndMask == 65535)
17801 return SDValue();
17802 if (isMask_32(AndMask)) {
17803 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17804 if (MaskedBits > ShiftAmt) {
17805 SDLoc DL(N);
17806 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17807 DAG.getConstant(MaskedBits, DL, MVT::i32));
17808 return DAG.getNode(
17809 ISD::SRL, DL, MVT::i32, SHL,
17810 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17811 }
17812 }
17813 }
17814
17815 // Nothing to be done for scalar shifts.
17816 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17817 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17818 return SDValue();
17819 if (ST->hasMVEIntegerOps())
17820 return SDValue();
17821
17822 int64_t Cnt;
17823
17824 switch (N->getOpcode()) {
17825 default: llvm_unreachable("unexpected shift opcode");
17826
17827 case ISD::SHL:
17828 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17829 SDLoc dl(N);
17830 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17831 DAG.getConstant(Cnt, dl, MVT::i32));
17832 }
17833 break;
17834
17835 case ISD::SRA:
17836 case ISD::SRL:
17837 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17838 unsigned VShiftOpc =
17839 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17840 SDLoc dl(N);
17841 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17842 DAG.getConstant(Cnt, dl, MVT::i32));
17843 }
17844 }
17845 return SDValue();
17846}
17847
17848// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17849// split into multiple extending loads, which are simpler to deal with than an
17850// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17851// to convert the type to an f32.
17853 SDValue N0 = N->getOperand(0);
17854 if (N0.getOpcode() != ISD::LOAD)
17855 return SDValue();
17856 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17857 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17858 LD->getExtensionType() != ISD::NON_EXTLOAD)
17859 return SDValue();
17860 EVT FromVT = LD->getValueType(0);
17861 EVT ToVT = N->getValueType(0);
17862 if (!ToVT.isVector())
17863 return SDValue();
17865 EVT ToEltVT = ToVT.getVectorElementType();
17866 EVT FromEltVT = FromVT.getVectorElementType();
17867
17868 unsigned NumElements = 0;
17869 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17870 NumElements = 4;
17871 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17872 NumElements = 4;
17873 if (NumElements == 0 ||
17874 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17875 FromVT.getVectorNumElements() % NumElements != 0 ||
17876 !isPowerOf2_32(NumElements))
17877 return SDValue();
17878
17879 LLVMContext &C = *DAG.getContext();
17880 SDLoc DL(LD);
17881 // Details about the old load
17882 SDValue Ch = LD->getChain();
17883 SDValue BasePtr = LD->getBasePtr();
17884 Align Alignment = LD->getOriginalAlign();
17885 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17886 AAMDNodes AAInfo = LD->getAAInfo();
17887
17888 ISD::LoadExtType NewExtType =
17889 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17890 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17891 EVT NewFromVT = EVT::getVectorVT(
17892 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17893 EVT NewToVT = EVT::getVectorVT(
17894 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17895
17898 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17899 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17900 SDValue NewPtr =
17901 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17902
17903 SDValue NewLoad =
17904 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17905 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17906 Alignment, MMOFlags, AAInfo);
17907 Loads.push_back(NewLoad);
17908 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17909 }
17910
17911 // Float truncs need to extended with VCVTB's into their floating point types.
17912 if (FromEltVT == MVT::f16) {
17914
17915 for (unsigned i = 0; i < Loads.size(); i++) {
17916 SDValue LoadBC =
17917 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17918 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17919 DAG.getConstant(0, DL, MVT::i32));
17920 Extends.push_back(FPExt);
17921 }
17922
17923 Loads = Extends;
17924 }
17925
17926 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17927 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17928 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17929}
17930
17931/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17932/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17934 const ARMSubtarget *ST) {
17935 SDValue N0 = N->getOperand(0);
17936
17937 // Check for sign- and zero-extensions of vector extract operations of 8- and
17938 // 16-bit vector elements. NEON and MVE support these directly. They are
17939 // handled during DAG combining because type legalization will promote them
17940 // to 32-bit types and it is messy to recognize the operations after that.
17941 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17943 SDValue Vec = N0.getOperand(0);
17944 SDValue Lane = N0.getOperand(1);
17945 EVT VT = N->getValueType(0);
17946 EVT EltVT = N0.getValueType();
17947 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17948
17949 if (VT == MVT::i32 &&
17950 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17951 TLI.isTypeLegal(Vec.getValueType()) &&
17952 isa<ConstantSDNode>(Lane)) {
17953
17954 unsigned Opc = 0;
17955 switch (N->getOpcode()) {
17956 default: llvm_unreachable("unexpected opcode");
17957 case ISD::SIGN_EXTEND:
17958 Opc = ARMISD::VGETLANEs;
17959 break;
17960 case ISD::ZERO_EXTEND:
17961 case ISD::ANY_EXTEND:
17962 Opc = ARMISD::VGETLANEu;
17963 break;
17964 }
17965 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17966 }
17967 }
17968
17969 if (ST->hasMVEIntegerOps())
17970 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17971 return NewLoad;
17972
17973 return SDValue();
17974}
17975
17977 const ARMSubtarget *ST) {
17978 if (ST->hasMVEFloatOps())
17979 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17980 return NewLoad;
17981
17982 return SDValue();
17983}
17984
17985// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17986// constant bounds.
17988 const ARMSubtarget *Subtarget) {
17989 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17990 !Subtarget->isThumb2())
17991 return SDValue();
17992
17993 EVT VT = Op.getValueType();
17994 SDValue Op0 = Op.getOperand(0);
17995
17996 if (VT != MVT::i32 ||
17997 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17998 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17999 !isa<ConstantSDNode>(Op0.getOperand(1)))
18000 return SDValue();
18001
18002 SDValue Min = Op;
18003 SDValue Max = Op0;
18004 SDValue Input = Op0.getOperand(0);
18005 if (Min.getOpcode() == ISD::SMAX)
18006 std::swap(Min, Max);
18007
18008 APInt MinC = Min.getConstantOperandAPInt(1);
18009 APInt MaxC = Max.getConstantOperandAPInt(1);
18010
18011 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
18012 !(MinC + 1).isPowerOf2())
18013 return SDValue();
18014
18015 SDLoc DL(Op);
18016 if (MinC == ~MaxC)
18017 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
18018 DAG.getConstant(MinC.countr_one(), DL, VT));
18019 if (MaxC == 0)
18020 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
18021 DAG.getConstant(MinC.countr_one(), DL, VT));
18022
18023 return SDValue();
18024}
18025
18026/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
18027/// saturates.
18029 const ARMSubtarget *ST) {
18030 EVT VT = N->getValueType(0);
18031 SDValue N0 = N->getOperand(0);
18032
18033 if (VT == MVT::i32)
18034 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
18035
18036 if (!ST->hasMVEIntegerOps())
18037 return SDValue();
18038
18039 if (SDValue V = PerformVQDMULHCombine(N, DAG))
18040 return V;
18041
18042 if (VT != MVT::v4i32 && VT != MVT::v8i16)
18043 return SDValue();
18044
18045 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
18046 // Check one is a smin and the other is a smax
18047 if (Min->getOpcode() != ISD::SMIN)
18048 std::swap(Min, Max);
18049 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18050 return false;
18051
18052 APInt SaturateC;
18053 if (VT == MVT::v4i32)
18054 SaturateC = APInt(32, (1 << 15) - 1, true);
18055 else //if (VT == MVT::v8i16)
18056 SaturateC = APInt(16, (1 << 7) - 1, true);
18057
18058 APInt MinC, MaxC;
18059 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18060 MinC != SaturateC)
18061 return false;
18062 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18063 MaxC != ~SaturateC)
18064 return false;
18065 return true;
18066 };
18067
18068 if (IsSignedSaturate(N, N0.getNode())) {
18069 SDLoc DL(N);
18070 MVT ExtVT, HalfVT;
18071 if (VT == MVT::v4i32) {
18072 HalfVT = MVT::v8i16;
18073 ExtVT = MVT::v4i16;
18074 } else { // if (VT == MVT::v8i16)
18075 HalfVT = MVT::v16i8;
18076 ExtVT = MVT::v8i8;
18077 }
18078
18079 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18080 // half. That extend will hopefully be removed if only the bottom bits are
18081 // demanded (though a truncating store, for example).
18082 SDValue VQMOVN =
18083 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18084 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18085 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18086 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18087 DAG.getValueType(ExtVT));
18088 }
18089
18090 auto IsUnsignedSaturate = [&](SDNode *Min) {
18091 // For unsigned, we just need to check for <= 0xffff
18092 if (Min->getOpcode() != ISD::UMIN)
18093 return false;
18094
18095 APInt SaturateC;
18096 if (VT == MVT::v4i32)
18097 SaturateC = APInt(32, (1 << 16) - 1, true);
18098 else //if (VT == MVT::v8i16)
18099 SaturateC = APInt(16, (1 << 8) - 1, true);
18100
18101 APInt MinC;
18102 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18103 MinC != SaturateC)
18104 return false;
18105 return true;
18106 };
18107
18108 if (IsUnsignedSaturate(N)) {
18109 SDLoc DL(N);
18110 MVT HalfVT;
18111 unsigned ExtConst;
18112 if (VT == MVT::v4i32) {
18113 HalfVT = MVT::v8i16;
18114 ExtConst = 0x0000FFFF;
18115 } else { //if (VT == MVT::v8i16)
18116 HalfVT = MVT::v16i8;
18117 ExtConst = 0x00FF;
18118 }
18119
18120 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18121 // an AND. That extend will hopefully be removed if only the bottom bits are
18122 // demanded (though a truncating store, for example).
18123 SDValue VQMOVN =
18124 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18125 DAG.getConstant(0, DL, MVT::i32));
18126 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18127 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18128 DAG.getConstant(ExtConst, DL, VT));
18129 }
18130
18131 return SDValue();
18132}
18133
18135 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18136 if (!C)
18137 return nullptr;
18138 const APInt *CV = &C->getAPIntValue();
18139 return CV->isPowerOf2() ? CV : nullptr;
18140}
18141
18143 // If we have a CMOV, OR and AND combination such as:
18144 // if (x & CN)
18145 // y |= CM;
18146 //
18147 // And:
18148 // * CN is a single bit;
18149 // * All bits covered by CM are known zero in y
18150 //
18151 // Then we can convert this into a sequence of BFI instructions. This will
18152 // always be a win if CM is a single bit, will always be no worse than the
18153 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18154 // three bits (due to the extra IT instruction).
18155
18156 SDValue Op0 = CMOV->getOperand(0);
18157 SDValue Op1 = CMOV->getOperand(1);
18158 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18159 SDValue CmpZ = CMOV->getOperand(3);
18160
18161 // The compare must be against zero.
18162 if (!isNullConstant(CmpZ->getOperand(1)))
18163 return SDValue();
18164
18165 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18166 SDValue And = CmpZ->getOperand(0);
18167 if (And->getOpcode() != ISD::AND)
18168 return SDValue();
18169 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18170 if (!AndC)
18171 return SDValue();
18172 SDValue X = And->getOperand(0);
18173
18174 if (CC == ARMCC::EQ) {
18175 // We're performing an "equal to zero" compare. Swap the operands so we
18176 // canonicalize on a "not equal to zero" compare.
18177 std::swap(Op0, Op1);
18178 } else {
18179 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18180 }
18181
18182 if (Op1->getOpcode() != ISD::OR)
18183 return SDValue();
18184
18185 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18186 if (!OrC)
18187 return SDValue();
18188 SDValue Y = Op1->getOperand(0);
18189
18190 if (Op0 != Y)
18191 return SDValue();
18192
18193 // Now, is it profitable to continue?
18194 APInt OrCI = OrC->getAPIntValue();
18195 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18196 if (OrCI.popcount() > Heuristic)
18197 return SDValue();
18198
18199 // Lastly, can we determine that the bits defined by OrCI
18200 // are zero in Y?
18201 KnownBits Known = DAG.computeKnownBits(Y);
18202 if ((OrCI & Known.Zero) != OrCI)
18203 return SDValue();
18204
18205 // OK, we can do the combine.
18206 SDValue V = Y;
18207 SDLoc dl(X);
18208 EVT VT = X.getValueType();
18209 unsigned BitInX = AndC->logBase2();
18210
18211 if (BitInX != 0) {
18212 // We must shift X first.
18213 X = DAG.getNode(ISD::SRL, dl, VT, X,
18214 DAG.getConstant(BitInX, dl, VT));
18215 }
18216
18217 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18218 BitInY < NumActiveBits; ++BitInY) {
18219 if (OrCI[BitInY] == 0)
18220 continue;
18221 APInt Mask(VT.getSizeInBits(), 0);
18222 Mask.setBit(BitInY);
18223 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18224 // Confusingly, the operand is an *inverted* mask.
18225 DAG.getConstant(~Mask, dl, VT));
18226 }
18227
18228 return V;
18229}
18230
18231// Given N, the value controlling the conditional branch, search for the loop
18232// intrinsic, returning it, along with how the value is used. We need to handle
18233// patterns such as the following:
18234// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18235// (brcond (setcc (loop.decrement), 0, eq), exit)
18236// (brcond (setcc (loop.decrement), 0, ne), header)
18238 bool &Negate) {
18239 switch (N->getOpcode()) {
18240 default:
18241 break;
18242 case ISD::XOR: {
18243 if (!isa<ConstantSDNode>(N.getOperand(1)))
18244 return SDValue();
18245 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18246 return SDValue();
18247 Negate = !Negate;
18248 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18249 }
18250 case ISD::SETCC: {
18251 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18252 if (!Const)
18253 return SDValue();
18254 if (Const->isZero())
18255 Imm = 0;
18256 else if (Const->isOne())
18257 Imm = 1;
18258 else
18259 return SDValue();
18260 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18261 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18262 }
18264 unsigned IntOp = N.getConstantOperandVal(1);
18265 if (IntOp != Intrinsic::test_start_loop_iterations &&
18266 IntOp != Intrinsic::loop_decrement_reg)
18267 return SDValue();
18268 return N;
18269 }
18270 }
18271 return SDValue();
18272}
18273
18276 const ARMSubtarget *ST) {
18277
18278 // The hwloop intrinsics that we're interested are used for control-flow,
18279 // either for entering or exiting the loop:
18280 // - test.start.loop.iterations will test whether its operand is zero. If it
18281 // is zero, the proceeding branch should not enter the loop.
18282 // - loop.decrement.reg also tests whether its operand is zero. If it is
18283 // zero, the proceeding branch should not branch back to the beginning of
18284 // the loop.
18285 // So here, we need to check that how the brcond is using the result of each
18286 // of the intrinsics to ensure that we're branching to the right place at the
18287 // right time.
18288
18290 SDValue Cond;
18291 int Imm = 1;
18292 bool Negate = false;
18293 SDValue Chain = N->getOperand(0);
18294 SDValue Dest;
18295
18296 if (N->getOpcode() == ISD::BRCOND) {
18297 CC = ISD::SETEQ;
18298 Cond = N->getOperand(1);
18299 Dest = N->getOperand(2);
18300 } else {
18301 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18302 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18303 Cond = N->getOperand(2);
18304 Dest = N->getOperand(4);
18305 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18306 if (!Const->isOne() && !Const->isZero())
18307 return SDValue();
18308 Imm = Const->getZExtValue();
18309 } else
18310 return SDValue();
18311 }
18312
18313 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18314 if (!Int)
18315 return SDValue();
18316
18317 if (Negate)
18318 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18319
18320 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18321 return (CC == ISD::SETEQ && Imm == 0) ||
18322 (CC == ISD::SETNE && Imm == 1) ||
18323 (CC == ISD::SETLT && Imm == 1) ||
18324 (CC == ISD::SETULT && Imm == 1);
18325 };
18326
18327 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18328 return (CC == ISD::SETEQ && Imm == 1) ||
18329 (CC == ISD::SETNE && Imm == 0) ||
18330 (CC == ISD::SETGT && Imm == 0) ||
18331 (CC == ISD::SETUGT && Imm == 0) ||
18332 (CC == ISD::SETGE && Imm == 1) ||
18333 (CC == ISD::SETUGE && Imm == 1);
18334 };
18335
18336 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18337 "unsupported condition");
18338
18339 SDLoc dl(Int);
18340 SelectionDAG &DAG = DCI.DAG;
18341 SDValue Elements = Int.getOperand(2);
18342 unsigned IntOp = Int->getConstantOperandVal(1);
18343 assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&
18344 "expected single br user");
18345 SDNode *Br = *N->user_begin();
18346 SDValue OtherTarget = Br->getOperand(1);
18347
18348 // Update the unconditional branch to branch to the given Dest.
18349 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18350 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18351 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18352 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18353 };
18354
18355 if (IntOp == Intrinsic::test_start_loop_iterations) {
18356 SDValue Res;
18357 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18358 // We expect this 'instruction' to branch when the counter is zero.
18359 if (IsTrueIfZero(CC, Imm)) {
18360 SDValue Ops[] = {Chain, Setup, Dest};
18361 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18362 } else {
18363 // The logic is the reverse of what we need for WLS, so find the other
18364 // basic block target: the target of the proceeding br.
18365 UpdateUncondBr(Br, Dest, DAG);
18366
18367 SDValue Ops[] = {Chain, Setup, OtherTarget};
18368 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18369 }
18370 // Update LR count to the new value
18371 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18372 // Update chain
18373 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18374 return Res;
18375 } else {
18376 SDValue Size =
18377 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18378 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18379 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18380 DAG.getVTList(MVT::i32, MVT::Other), Args);
18381 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18382
18383 // We expect this instruction to branch when the count is not zero.
18384 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18385
18386 // Update the unconditional branch to target the loop preheader if we've
18387 // found the condition has been reversed.
18388 if (Target == OtherTarget)
18389 UpdateUncondBr(Br, Dest, DAG);
18390
18391 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18392 SDValue(LoopDec.getNode(), 1), Chain);
18393
18394 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18395 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18396 }
18397 return SDValue();
18398}
18399
18400/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18401SDValue
18403 SDValue Cmp = N->getOperand(3);
18404 if (Cmp.getOpcode() != ARMISD::CMPZ)
18405 // Only looking at NE cases.
18406 return SDValue();
18407
18408 SDLoc dl(N);
18409 SDValue LHS = Cmp.getOperand(0);
18410 SDValue RHS = Cmp.getOperand(1);
18411 SDValue Chain = N->getOperand(0);
18412 SDValue BB = N->getOperand(1);
18413 SDValue ARMcc = N->getOperand(2);
18415
18416 // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
18417 // -> (brcond Chain BB CC Flags)
18418 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18419 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18420 LHS->getOperand(0)->hasOneUse() &&
18421 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18422 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18423 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18424 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18425 LHS->getOperand(0)->getOperand(2),
18426 LHS->getOperand(0)->getOperand(3));
18427 }
18428
18429 return SDValue();
18430}
18431
18432/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18433SDValue
18435 SDValue Cmp = N->getOperand(3);
18436 if (Cmp.getOpcode() != ARMISD::CMPZ)
18437 // Only looking at EQ and NE cases.
18438 return SDValue();
18439
18440 EVT VT = N->getValueType(0);
18441 SDLoc dl(N);
18442 SDValue LHS = Cmp.getOperand(0);
18443 SDValue RHS = Cmp.getOperand(1);
18444 SDValue FalseVal = N->getOperand(0);
18445 SDValue TrueVal = N->getOperand(1);
18446 SDValue ARMcc = N->getOperand(2);
18448
18449 // BFI is only available on V6T2+.
18450 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18452 if (R)
18453 return R;
18454 }
18455
18456 // Simplify
18457 // mov r1, r0
18458 // cmp r1, x
18459 // mov r0, y
18460 // moveq r0, x
18461 // to
18462 // cmp r0, x
18463 // movne r0, y
18464 //
18465 // mov r1, r0
18466 // cmp r1, x
18467 // mov r0, x
18468 // movne r0, y
18469 // to
18470 // cmp r0, x
18471 // movne r0, y
18472 /// FIXME: Turn this into a target neutral optimization?
18473 SDValue Res;
18474 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18475 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18476 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18477 SDValue ARMcc;
18478 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18479 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18480 }
18481
18482 // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
18483 // -> (cmov F T CC Flags)
18484 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18485 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18487 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18488 LHS->getOperand(2), LHS->getOperand(3));
18489 }
18490
18491 if (!VT.isInteger())
18492 return SDValue();
18493
18494 // Fold away an unneccessary CMPZ/CMOV
18495 // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18496 // if C1==EQ -> CMOV A, B, C2, D
18497 // if C1==NE -> CMOV A, B, NOT(C2), D
18498 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18499 N->getConstantOperandVal(2) == ARMCC::NE) {
18501 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
18502 if (N->getConstantOperandVal(2) == ARMCC::NE)
18504 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18505 N->getOperand(1),
18506 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
18507 }
18508 }
18509
18510 // Materialize a boolean comparison for integers so we can avoid branching.
18511 if (isNullConstant(FalseVal)) {
18512 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18513 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18514 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18515 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18516 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18517 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18518 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18519 DAG.getConstant(5, dl, MVT::i32));
18520 } else {
18521 // CMOV 0, 1, ==, (CMPZ x, y) ->
18522 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18523 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18524 //
18525 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18526 // x != y. In other words, a carry C == 1 when x == y, C == 0
18527 // otherwise.
18528 // The final UADDO_CARRY computes
18529 // x - y + (0 - (x - y)) + C == C
18530 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18531 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18532 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18533 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18534 // actually.
18535 SDValue Carry =
18536 DAG.getNode(ISD::SUB, dl, MVT::i32,
18537 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18538 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18539 }
18540 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18541 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18542 // This seems pointless but will allow us to combine it further below.
18543 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18544 SDValue Sub =
18545 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18546 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18547 Sub.getValue(1));
18548 FalseVal = Sub;
18549 }
18550 } else if (isNullConstant(TrueVal)) {
18551 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18552 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18553 // This seems pointless but will allow us to combine it further below
18554 // Note that we change == for != as this is the dual for the case above.
18555 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18556 SDValue Sub =
18557 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18558 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18559 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18560 Sub.getValue(1));
18561 FalseVal = Sub;
18562 }
18563 }
18564
18565 // On Thumb1, the DAG above may be further combined if z is a power of 2
18566 // (z == 2 ^ K).
18567 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18568 // t1 = (USUBO (SUB x, y), 1)
18569 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18570 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18571 //
18572 // This also handles the special case of comparing against zero; it's
18573 // essentially, the same pattern, except there's no SUBC:
18574 // CMOV x, z, !=, (CMPZ x, 0) ->
18575 // t1 = (USUBO x, 1)
18576 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18577 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18578 const APInt *TrueConst;
18579 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18580 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18581 FalseVal.getOperand(1) == RHS) ||
18582 (FalseVal == LHS && isNullConstant(RHS))) &&
18583 (TrueConst = isPowerOf2Constant(TrueVal))) {
18584 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18585 unsigned ShiftAmount = TrueConst->logBase2();
18586 if (ShiftAmount)
18587 TrueVal = DAG.getConstant(1, dl, VT);
18588 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18589 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18590 Subc.getValue(1));
18591
18592 if (ShiftAmount)
18593 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18594 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18595 }
18596
18597 if (Res.getNode()) {
18598 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18599 // Capture demanded bits information that would be otherwise lost.
18600 if (Known.Zero == 0xfffffffe)
18601 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18602 DAG.getValueType(MVT::i1));
18603 else if (Known.Zero == 0xffffff00)
18604 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18605 DAG.getValueType(MVT::i8));
18606 else if (Known.Zero == 0xffff0000)
18607 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18608 DAG.getValueType(MVT::i16));
18609 }
18610
18611 return Res;
18612}
18613
18616 const ARMSubtarget *ST) {
18617 SelectionDAG &DAG = DCI.DAG;
18618 SDValue Src = N->getOperand(0);
18619 EVT DstVT = N->getValueType(0);
18620
18621 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18622 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18623 EVT SrcVT = Src.getValueType();
18624 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18625 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18626 }
18627
18628 // We may have a bitcast of something that has already had this bitcast
18629 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18630 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18631 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18632 Src.getValueType().getScalarSizeInBits())
18633 Src = Src.getOperand(0);
18634
18635 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18636 // would be generated is at least the width of the element type.
18637 EVT SrcVT = Src.getValueType();
18638 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18639 Src.getOpcode() == ARMISD::VMVNIMM ||
18640 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18641 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18642 DAG.getDataLayout().isBigEndian())
18643 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18644
18645 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18646 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18647 return R;
18648
18649 return SDValue();
18650}
18651
18652// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18653// node into stack operations after legalizeOps.
18656 SelectionDAG &DAG = DCI.DAG;
18657 EVT VT = N->getValueType(0);
18658 SDLoc DL(N);
18659
18660 // MVETrunc(Undef, Undef) -> Undef
18661 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18662 return DAG.getUNDEF(VT);
18663
18664 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18665 if (N->getNumOperands() == 2 &&
18666 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18667 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18668 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18669 N->getOperand(0).getOperand(1),
18670 N->getOperand(1).getOperand(0),
18671 N->getOperand(1).getOperand(1));
18672
18673 // MVETrunc(shuffle, shuffle) -> VMOVN
18674 if (N->getNumOperands() == 2 &&
18675 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18676 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18677 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18678 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18679
18680 if (S0->getOperand(0) == S1->getOperand(0) &&
18681 S0->getOperand(1) == S1->getOperand(1)) {
18682 // Construct complete shuffle mask
18683 SmallVector<int, 8> Mask(S0->getMask());
18684 Mask.append(S1->getMask().begin(), S1->getMask().end());
18685
18686 if (isVMOVNTruncMask(Mask, VT, false))
18687 return DAG.getNode(
18688 ARMISD::VMOVN, DL, VT,
18689 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18690 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18691 DAG.getConstant(1, DL, MVT::i32));
18692 if (isVMOVNTruncMask(Mask, VT, true))
18693 return DAG.getNode(
18694 ARMISD::VMOVN, DL, VT,
18695 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18696 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18697 DAG.getConstant(1, DL, MVT::i32));
18698 }
18699 }
18700
18701 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18702 // truncate to a buildvector to allow the generic optimisations to kick in.
18703 if (all_of(N->ops(), [](SDValue Op) {
18704 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18705 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18706 (Op.getOpcode() == ISD::BITCAST &&
18707 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18708 })) {
18709 SmallVector<SDValue, 8> Extracts;
18710 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18711 SDValue O = N->getOperand(Op);
18712 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18713 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18714 DAG.getConstant(i, DL, MVT::i32));
18715 Extracts.push_back(Ext);
18716 }
18717 }
18718 return DAG.getBuildVector(VT, DL, Extracts);
18719 }
18720
18721 // If we are late in the legalization process and nothing has optimised
18722 // the trunc to anything better, lower it to a stack store and reload,
18723 // performing the truncation whilst keeping the lanes in the correct order:
18724 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18725 if (!DCI.isAfterLegalizeDAG())
18726 return SDValue();
18727
18728 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18729 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18730 int NumIns = N->getNumOperands();
18731 assert((NumIns == 2 || NumIns == 4) &&
18732 "Expected 2 or 4 inputs to an MVETrunc");
18733 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18734 if (N->getNumOperands() == 4)
18735 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18736
18737 SmallVector<SDValue> Chains;
18738 for (int I = 0; I < NumIns; I++) {
18739 SDValue Ptr = DAG.getNode(
18740 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18741 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18743 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18744 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18745 Ptr, MPI, StoreVT, Align(4));
18746 Chains.push_back(Ch);
18747 }
18748
18749 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18750 MachinePointerInfo MPI =
18752 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18753}
18754
18755// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18757 SelectionDAG &DAG) {
18758 SDValue N0 = N->getOperand(0);
18759 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18760 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18761 return SDValue();
18762
18763 EVT FromVT = LD->getMemoryVT();
18764 EVT ToVT = N->getValueType(0);
18765 if (!ToVT.isVector())
18766 return SDValue();
18767 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18768 EVT ToEltVT = ToVT.getVectorElementType();
18769 EVT FromEltVT = FromVT.getVectorElementType();
18770
18771 unsigned NumElements = 0;
18772 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18773 NumElements = 4;
18774 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18775 NumElements = 8;
18776 assert(NumElements != 0);
18777
18778 ISD::LoadExtType NewExtType =
18779 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18780 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18781 LD->getExtensionType() != ISD::EXTLOAD &&
18782 LD->getExtensionType() != NewExtType)
18783 return SDValue();
18784
18785 LLVMContext &C = *DAG.getContext();
18786 SDLoc DL(LD);
18787 // Details about the old load
18788 SDValue Ch = LD->getChain();
18789 SDValue BasePtr = LD->getBasePtr();
18790 Align Alignment = LD->getOriginalAlign();
18791 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18792 AAMDNodes AAInfo = LD->getAAInfo();
18793
18794 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18795 EVT NewFromVT = EVT::getVectorVT(
18796 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18797 EVT NewToVT = EVT::getVectorVT(
18798 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18799
18802 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18803 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18804 SDValue NewPtr =
18805 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18806
18807 SDValue NewLoad =
18808 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18809 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18810 Alignment, MMOFlags, AAInfo);
18811 Loads.push_back(NewLoad);
18812 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18813 }
18814
18815 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18816 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18817 return DAG.getMergeValues(Loads, DL);
18818}
18819
18820// Perform combines for MVEEXT. If it has not be optimized to anything better
18821// before lowering, it gets converted to stack store and extloads performing the
18822// extend whilst still keeping the same lane ordering.
18825 SelectionDAG &DAG = DCI.DAG;
18826 EVT VT = N->getValueType(0);
18827 SDLoc DL(N);
18828 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18829 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18830
18831 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18832 *DAG.getContext());
18833 auto Extend = [&](SDValue V) {
18834 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18835 return N->getOpcode() == ARMISD::MVESEXT
18836 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18837 DAG.getValueType(ExtVT))
18838 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18839 };
18840
18841 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18842 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18843 SDValue Ext = Extend(N->getOperand(0));
18844 return DAG.getMergeValues({Ext, Ext}, DL);
18845 }
18846
18847 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18848 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18849 ArrayRef<int> Mask = SVN->getMask();
18850 assert(Mask.size() == 2 * VT.getVectorNumElements());
18851 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18852 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18853 SDValue Op0 = SVN->getOperand(0);
18854 SDValue Op1 = SVN->getOperand(1);
18855
18856 auto CheckInregMask = [&](int Start, int Offset) {
18857 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18858 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18859 return false;
18860 return true;
18861 };
18862 SDValue V0 = SDValue(N, 0);
18863 SDValue V1 = SDValue(N, 1);
18864 if (CheckInregMask(0, 0))
18865 V0 = Extend(Op0);
18866 else if (CheckInregMask(0, 1))
18867 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18868 else if (CheckInregMask(0, Mask.size()))
18869 V0 = Extend(Op1);
18870 else if (CheckInregMask(0, Mask.size() + 1))
18871 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18872
18873 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18874 V1 = Extend(Op1);
18875 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18876 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18877 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18878 V1 = Extend(Op0);
18879 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18880 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18881
18882 if (V0.getNode() != N || V1.getNode() != N)
18883 return DAG.getMergeValues({V0, V1}, DL);
18884 }
18885
18886 // MVEEXT(load) -> extload, extload
18887 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18889 return L;
18890
18891 if (!DCI.isAfterLegalizeDAG())
18892 return SDValue();
18893
18894 // Lower to a stack store and reload:
18895 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18896 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18897 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18898 int NumOuts = N->getNumValues();
18899 assert((NumOuts == 2 || NumOuts == 4) &&
18900 "Expected 2 or 4 outputs to an MVEEXT");
18901 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18902 *DAG.getContext());
18903 if (N->getNumOperands() == 4)
18904 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18905
18906 MachinePointerInfo MPI =
18908 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18909 StackPtr, MPI, Align(4));
18910
18912 for (int I = 0; I < NumOuts; I++) {
18913 SDValue Ptr = DAG.getNode(
18914 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18915 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18917 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18918 SDValue Load = DAG.getExtLoad(
18919 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18920 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18921 Loads.push_back(Load);
18922 }
18923
18924 return DAG.getMergeValues(Loads, DL);
18925}
18926
18928 DAGCombinerInfo &DCI) const {
18929 switch (N->getOpcode()) {
18930 default: break;
18931 case ISD::SELECT_CC:
18932 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18933 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18934 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18935 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18936 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18937 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18938 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18939 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18940 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18941 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18942 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18943 case ISD::BRCOND:
18944 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18945 case ARMISD::ADDC:
18946 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18947 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18948 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18949 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18950 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18951 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18952 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18953 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18954 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18957 return PerformExtractEltCombine(N, DCI, Subtarget);
18961 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18962 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18963 case ISD::FP_TO_SINT:
18964 case ISD::FP_TO_UINT:
18965 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18966 case ISD::FADD:
18967 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18968 case ISD::FMUL:
18969 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18971 return PerformIntrinsicCombine(N, DCI);
18972 case ISD::SHL:
18973 case ISD::SRA:
18974 case ISD::SRL:
18975 return PerformShiftCombine(N, DCI, Subtarget);
18976 case ISD::SIGN_EXTEND:
18977 case ISD::ZERO_EXTEND:
18978 case ISD::ANY_EXTEND:
18979 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18980 case ISD::FP_EXTEND:
18981 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18982 case ISD::SMIN:
18983 case ISD::UMIN:
18984 case ISD::SMAX:
18985 case ISD::UMAX:
18986 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18987 case ARMISD::CMOV:
18988 return PerformCMOVCombine(N, DCI.DAG);
18989 case ARMISD::BRCOND:
18990 return PerformBRCONDCombine(N, DCI.DAG);
18991 case ARMISD::CMPZ:
18992 return PerformCMPZCombine(N, DCI.DAG);
18993 case ARMISD::CSINC:
18994 case ARMISD::CSINV:
18995 case ARMISD::CSNEG:
18996 return PerformCSETCombine(N, DCI.DAG);
18997 case ISD::LOAD:
18998 return PerformLOADCombine(N, DCI, Subtarget);
18999 case ARMISD::VLD1DUP:
19000 case ARMISD::VLD2DUP:
19001 case ARMISD::VLD3DUP:
19002 case ARMISD::VLD4DUP:
19003 return PerformVLDCombine(N, DCI);
19005 return PerformARMBUILD_VECTORCombine(N, DCI);
19006 case ISD::BITCAST:
19007 return PerformBITCASTCombine(N, DCI, Subtarget);
19009 return PerformPREDICATE_CASTCombine(N, DCI);
19011 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
19012 case ARMISD::MVETRUNC:
19013 return PerformMVETruncCombine(N, DCI);
19014 case ARMISD::MVESEXT:
19015 case ARMISD::MVEZEXT:
19016 return PerformMVEExtCombine(N, DCI);
19017 case ARMISD::VCMP:
19018 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
19019 case ISD::VECREDUCE_ADD:
19020 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
19021 case ARMISD::VADDVs:
19022 case ARMISD::VADDVu:
19023 case ARMISD::VADDLVs:
19024 case ARMISD::VADDLVu:
19025 case ARMISD::VADDLVAs:
19026 case ARMISD::VADDLVAu:
19027 case ARMISD::VMLAVs:
19028 case ARMISD::VMLAVu:
19029 case ARMISD::VMLALVs:
19030 case ARMISD::VMLALVu:
19031 case ARMISD::VMLALVAs:
19032 case ARMISD::VMLALVAu:
19033 return PerformReduceShuffleCombine(N, DCI.DAG);
19034 case ARMISD::VMOVN:
19035 return PerformVMOVNCombine(N, DCI);
19036 case ARMISD::VQMOVNs:
19037 case ARMISD::VQMOVNu:
19038 return PerformVQMOVNCombine(N, DCI);
19039 case ARMISD::VQDMULH:
19040 return PerformVQDMULHCombine(N, DCI);
19041 case ARMISD::ASRL:
19042 case ARMISD::LSRL:
19043 case ARMISD::LSLL:
19044 return PerformLongShiftCombine(N, DCI.DAG);
19045 case ARMISD::SMULWB: {
19046 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19047 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19048 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19049 return SDValue();
19050 break;
19051 }
19052 case ARMISD::SMULWT: {
19053 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19054 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19055 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19056 return SDValue();
19057 break;
19058 }
19059 case ARMISD::SMLALBB:
19060 case ARMISD::QADD16b:
19061 case ARMISD::QSUB16b:
19062 case ARMISD::UQADD16b:
19063 case ARMISD::UQSUB16b: {
19064 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19065 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19066 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19067 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19068 return SDValue();
19069 break;
19070 }
19071 case ARMISD::SMLALBT: {
19072 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19073 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19074 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19075 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19076 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19077 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19078 return SDValue();
19079 break;
19080 }
19081 case ARMISD::SMLALTB: {
19082 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19083 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19084 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19085 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19086 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19087 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19088 return SDValue();
19089 break;
19090 }
19091 case ARMISD::SMLALTT: {
19092 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19093 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19094 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19095 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19096 return SDValue();
19097 break;
19098 }
19099 case ARMISD::QADD8b:
19100 case ARMISD::QSUB8b:
19101 case ARMISD::UQADD8b:
19102 case ARMISD::UQSUB8b: {
19103 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19104 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19105 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19106 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19107 return SDValue();
19108 break;
19109 }
19110 case ARMISD::VBSP:
19111 if (N->getOperand(1) == N->getOperand(2))
19112 return N->getOperand(1);
19113 return SDValue();
19116 switch (N->getConstantOperandVal(1)) {
19117 case Intrinsic::arm_neon_vld1:
19118 case Intrinsic::arm_neon_vld1x2:
19119 case Intrinsic::arm_neon_vld1x3:
19120 case Intrinsic::arm_neon_vld1x4:
19121 case Intrinsic::arm_neon_vld2:
19122 case Intrinsic::arm_neon_vld3:
19123 case Intrinsic::arm_neon_vld4:
19124 case Intrinsic::arm_neon_vld2lane:
19125 case Intrinsic::arm_neon_vld3lane:
19126 case Intrinsic::arm_neon_vld4lane:
19127 case Intrinsic::arm_neon_vld2dup:
19128 case Intrinsic::arm_neon_vld3dup:
19129 case Intrinsic::arm_neon_vld4dup:
19130 case Intrinsic::arm_neon_vst1:
19131 case Intrinsic::arm_neon_vst1x2:
19132 case Intrinsic::arm_neon_vst1x3:
19133 case Intrinsic::arm_neon_vst1x4:
19134 case Intrinsic::arm_neon_vst2:
19135 case Intrinsic::arm_neon_vst3:
19136 case Intrinsic::arm_neon_vst4:
19137 case Intrinsic::arm_neon_vst2lane:
19138 case Intrinsic::arm_neon_vst3lane:
19139 case Intrinsic::arm_neon_vst4lane:
19140 return PerformVLDCombine(N, DCI);
19141 case Intrinsic::arm_mve_vld2q:
19142 case Intrinsic::arm_mve_vld4q:
19143 case Intrinsic::arm_mve_vst2q:
19144 case Intrinsic::arm_mve_vst4q:
19145 return PerformMVEVLDCombine(N, DCI);
19146 default: break;
19147 }
19148 break;
19149 }
19150 return SDValue();
19151}
19152
19154 EVT VT) const {
19155 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19156}
19157
19159 Align Alignment,
19161 unsigned *Fast) const {
19162 // Depends what it gets converted into if the type is weird.
19163 if (!VT.isSimple())
19164 return false;
19165
19166 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19167 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19168 auto Ty = VT.getSimpleVT().SimpleTy;
19169
19170 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19171 // Unaligned access can use (for example) LRDB, LRDH, LDR
19172 if (AllowsUnaligned) {
19173 if (Fast)
19174 *Fast = Subtarget->hasV7Ops();
19175 return true;
19176 }
19177 }
19178
19179 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19180 // For any little-endian targets with neon, we can support unaligned ld/st
19181 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19182 // A big-endian target may also explicitly support unaligned accesses
19183 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19184 if (Fast)
19185 *Fast = 1;
19186 return true;
19187 }
19188 }
19189
19190 if (!Subtarget->hasMVEIntegerOps())
19191 return false;
19192
19193 // These are for predicates
19194 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19195 Ty == MVT::v2i1)) {
19196 if (Fast)
19197 *Fast = 1;
19198 return true;
19199 }
19200
19201 // These are for truncated stores/narrowing loads. They are fine so long as
19202 // the alignment is at least the size of the item being loaded
19203 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19204 Alignment >= VT.getScalarSizeInBits() / 8) {
19205 if (Fast)
19206 *Fast = true;
19207 return true;
19208 }
19209
19210 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19211 // VSTRW.U32 all store the vector register in exactly the same format, and
19212 // differ only in the range of their immediate offset field and the required
19213 // alignment. So there is always a store that can be used, regardless of
19214 // actual type.
19215 //
19216 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19217 // VREV64.8) pair and get the same effect. This will likely be better than
19218 // aligning the vector through the stack.
19219 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19220 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19221 Ty == MVT::v2f64) {
19222 if (Fast)
19223 *Fast = 1;
19224 return true;
19225 }
19226
19227 return false;
19228}
19229
19230
19232 const MemOp &Op, const AttributeList &FuncAttributes) const {
19233 // See if we can use NEON instructions for this...
19234 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19235 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19236 unsigned Fast;
19237 if (Op.size() >= 16 &&
19238 (Op.isAligned(Align(16)) ||
19239 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19241 Fast))) {
19242 return MVT::v2f64;
19243 } else if (Op.size() >= 8 &&
19244 (Op.isAligned(Align(8)) ||
19246 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19247 Fast))) {
19248 return MVT::f64;
19249 }
19250 }
19251
19252 // Let the target-independent logic figure it out.
19253 return MVT::Other;
19254}
19255
19256// 64-bit integers are split into their high and low parts and held in two
19257// different registers, so the trunc is free since the low register can just
19258// be used.
19259bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19260 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19261 return false;
19262 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19263 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19264 return (SrcBits == 64 && DestBits == 32);
19265}
19266
19268 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19269 !DstVT.isInteger())
19270 return false;
19271 unsigned SrcBits = SrcVT.getSizeInBits();
19272 unsigned DestBits = DstVT.getSizeInBits();
19273 return (SrcBits == 64 && DestBits == 32);
19274}
19275
19277 if (Val.getOpcode() != ISD::LOAD)
19278 return false;
19279
19280 EVT VT1 = Val.getValueType();
19281 if (!VT1.isSimple() || !VT1.isInteger() ||
19282 !VT2.isSimple() || !VT2.isInteger())
19283 return false;
19284
19285 switch (VT1.getSimpleVT().SimpleTy) {
19286 default: break;
19287 case MVT::i1:
19288 case MVT::i8:
19289 case MVT::i16:
19290 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19291 return true;
19292 }
19293
19294 return false;
19295}
19296
19298 if (!VT.isSimple())
19299 return false;
19300
19301 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19302 // negate values directly (fneg is free). So, we don't want to let the DAG
19303 // combiner rewrite fneg into xors and some other instructions. For f16 and
19304 // FullFP16 argument passing, some bitcast nodes may be introduced,
19305 // triggering this DAG combine rewrite, so we are avoiding that with this.
19306 switch (VT.getSimpleVT().SimpleTy) {
19307 default: break;
19308 case MVT::f16:
19309 return Subtarget->hasFullFP16();
19310 }
19311
19312 return false;
19313}
19314
19316 if (!Subtarget->hasMVEIntegerOps())
19317 return nullptr;
19318 Type *SVIType = SVI->getType();
19319 Type *ScalarType = SVIType->getScalarType();
19320
19321 if (ScalarType->isFloatTy())
19322 return Type::getInt32Ty(SVIType->getContext());
19323 if (ScalarType->isHalfTy())
19324 return Type::getInt16Ty(SVIType->getContext());
19325 return nullptr;
19326}
19327
19329 EVT VT = ExtVal.getValueType();
19330
19331 if (!isTypeLegal(VT))
19332 return false;
19333
19334 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19335 if (Ld->isExpandingLoad())
19336 return false;
19337 }
19338
19339 if (Subtarget->hasMVEIntegerOps())
19340 return true;
19341
19342 // Don't create a loadext if we can fold the extension into a wide/long
19343 // instruction.
19344 // If there's more than one user instruction, the loadext is desirable no
19345 // matter what. There can be two uses by the same instruction.
19346 if (ExtVal->use_empty() ||
19347 !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))
19348 return true;
19349
19350 SDNode *U = *ExtVal->user_begin();
19351 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19352 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19353 return false;
19354
19355 return true;
19356}
19357
19359 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19360 return false;
19361
19362 if (!isTypeLegal(EVT::getEVT(Ty1)))
19363 return false;
19364
19365 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19366
19367 // Assuming the caller doesn't have a zeroext or signext return parameter,
19368 // truncation all the way down to i1 is valid.
19369 return true;
19370}
19371
19372/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19373/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19374/// expanded to FMAs when this method returns true, otherwise fmuladd is
19375/// expanded to fmul + fadd.
19376///
19377/// ARM supports both fused and unfused multiply-add operations; we already
19378/// lower a pair of fmul and fadd to the latter so it's not clear that there
19379/// would be a gain or that the gain would be worthwhile enough to risk
19380/// correctness bugs.
19381///
19382/// For MVE, we set this to true as it helps simplify the need for some
19383/// patterns (and we don't have the non-fused floating point instruction).
19384bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19385 EVT VT) const {
19386 if (Subtarget->useSoftFloat())
19387 return false;
19388
19389 if (!VT.isSimple())
19390 return false;
19391
19392 switch (VT.getSimpleVT().SimpleTy) {
19393 case MVT::v4f32:
19394 case MVT::v8f16:
19395 return Subtarget->hasMVEFloatOps();
19396 case MVT::f16:
19397 return Subtarget->useFPVFMx16();
19398 case MVT::f32:
19399 return Subtarget->useFPVFMx();
19400 case MVT::f64:
19401 return Subtarget->useFPVFMx64();
19402 default:
19403 break;
19404 }
19405
19406 return false;
19407}
19408
19409static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19410 if (V < 0)
19411 return false;
19412
19413 unsigned Scale = 1;
19414 switch (VT.getSimpleVT().SimpleTy) {
19415 case MVT::i1:
19416 case MVT::i8:
19417 // Scale == 1;
19418 break;
19419 case MVT::i16:
19420 // Scale == 2;
19421 Scale = 2;
19422 break;
19423 default:
19424 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19425 // Scale == 4;
19426 Scale = 4;
19427 break;
19428 }
19429
19430 if ((V & (Scale - 1)) != 0)
19431 return false;
19432 return isUInt<5>(V / Scale);
19433}
19434
19435static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19436 const ARMSubtarget *Subtarget) {
19437 if (!VT.isInteger() && !VT.isFloatingPoint())
19438 return false;
19439 if (VT.isVector() && Subtarget->hasNEON())
19440 return false;
19441 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19442 !Subtarget->hasMVEFloatOps())
19443 return false;
19444
19445 bool IsNeg = false;
19446 if (V < 0) {
19447 IsNeg = true;
19448 V = -V;
19449 }
19450
19451 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19452
19453 // MVE: size * imm7
19454 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19455 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19456 case MVT::i32:
19457 case MVT::f32:
19458 return isShiftedUInt<7,2>(V);
19459 case MVT::i16:
19460 case MVT::f16:
19461 return isShiftedUInt<7,1>(V);
19462 case MVT::i8:
19463 return isUInt<7>(V);
19464 default:
19465 return false;
19466 }
19467 }
19468
19469 // half VLDR: 2 * imm8
19470 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19471 return isShiftedUInt<8, 1>(V);
19472 // VLDR and LDRD: 4 * imm8
19473 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19474 return isShiftedUInt<8, 2>(V);
19475
19476 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19477 // + imm12 or - imm8
19478 if (IsNeg)
19479 return isUInt<8>(V);
19480 return isUInt<12>(V);
19481 }
19482
19483 return false;
19484}
19485
19486/// isLegalAddressImmediate - Return true if the integer value can be used
19487/// as the offset of the target addressing mode for load / store of the
19488/// given type.
19489static bool isLegalAddressImmediate(int64_t V, EVT VT,
19490 const ARMSubtarget *Subtarget) {
19491 if (V == 0)
19492 return true;
19493
19494 if (!VT.isSimple())
19495 return false;
19496
19497 if (Subtarget->isThumb1Only())
19498 return isLegalT1AddressImmediate(V, VT);
19499 else if (Subtarget->isThumb2())
19500 return isLegalT2AddressImmediate(V, VT, Subtarget);
19501
19502 // ARM mode.
19503 if (V < 0)
19504 V = - V;
19505 switch (VT.getSimpleVT().SimpleTy) {
19506 default: return false;
19507 case MVT::i1:
19508 case MVT::i8:
19509 case MVT::i32:
19510 // +- imm12
19511 return isUInt<12>(V);
19512 case MVT::i16:
19513 // +- imm8
19514 return isUInt<8>(V);
19515 case MVT::f32:
19516 case MVT::f64:
19517 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19518 return false;
19519 return isShiftedUInt<8, 2>(V);
19520 }
19521}
19522
19524 EVT VT) const {
19525 int Scale = AM.Scale;
19526 if (Scale < 0)
19527 return false;
19528
19529 switch (VT.getSimpleVT().SimpleTy) {
19530 default: return false;
19531 case MVT::i1:
19532 case MVT::i8:
19533 case MVT::i16:
19534 case MVT::i32:
19535 if (Scale == 1)
19536 return true;
19537 // r + r << imm
19538 Scale = Scale & ~1;
19539 return Scale == 2 || Scale == 4 || Scale == 8;
19540 case MVT::i64:
19541 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19542 // version in Thumb mode.
19543 // r + r
19544 if (Scale == 1)
19545 return true;
19546 // r * 2 (this can be lowered to r + r).
19547 if (!AM.HasBaseReg && Scale == 2)
19548 return true;
19549 return false;
19550 case MVT::isVoid:
19551 // Note, we allow "void" uses (basically, uses that aren't loads or
19552 // stores), because arm allows folding a scale into many arithmetic
19553 // operations. This should be made more precise and revisited later.
19554
19555 // Allow r << imm, but the imm has to be a multiple of two.
19556 if (Scale & 1) return false;
19557 return isPowerOf2_32(Scale);
19558 }
19559}
19560
19562 EVT VT) const {
19563 const int Scale = AM.Scale;
19564
19565 // Negative scales are not supported in Thumb1.
19566 if (Scale < 0)
19567 return false;
19568
19569 // Thumb1 addressing modes do not support register scaling excepting the
19570 // following cases:
19571 // 1. Scale == 1 means no scaling.
19572 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19573 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19574}
19575
19576/// isLegalAddressingMode - Return true if the addressing mode represented
19577/// by AM is legal for this target, for a load/store of the specified type.
19579 const AddrMode &AM, Type *Ty,
19580 unsigned AS, Instruction *I) const {
19581 EVT VT = getValueType(DL, Ty, true);
19582 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19583 return false;
19584
19585 // Can never fold addr of global into load/store.
19586 if (AM.BaseGV)
19587 return false;
19588
19589 switch (AM.Scale) {
19590 case 0: // no scale reg, must be "r+i" or "r", or "i".
19591 break;
19592 default:
19593 // ARM doesn't support any R+R*scale+imm addr modes.
19594 if (AM.BaseOffs)
19595 return false;
19596
19597 if (!VT.isSimple())
19598 return false;
19599
19600 if (Subtarget->isThumb1Only())
19601 return isLegalT1ScaledAddressingMode(AM, VT);
19602
19603 if (Subtarget->isThumb2())
19604 return isLegalT2ScaledAddressingMode(AM, VT);
19605
19606 int Scale = AM.Scale;
19607 switch (VT.getSimpleVT().SimpleTy) {
19608 default: return false;
19609 case MVT::i1:
19610 case MVT::i8:
19611 case MVT::i32:
19612 if (Scale < 0) Scale = -Scale;
19613 if (Scale == 1)
19614 return true;
19615 // r + r << imm
19616 return isPowerOf2_32(Scale & ~1);
19617 case MVT::i16:
19618 case MVT::i64:
19619 // r +/- r
19620 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19621 return true;
19622 // r * 2 (this can be lowered to r + r).
19623 if (!AM.HasBaseReg && Scale == 2)
19624 return true;
19625 return false;
19626
19627 case MVT::isVoid:
19628 // Note, we allow "void" uses (basically, uses that aren't loads or
19629 // stores), because arm allows folding a scale into many arithmetic
19630 // operations. This should be made more precise and revisited later.
19631
19632 // Allow r << imm, but the imm has to be a multiple of two.
19633 if (Scale & 1) return false;
19634 return isPowerOf2_32(Scale);
19635 }
19636 }
19637 return true;
19638}
19639
19640/// isLegalICmpImmediate - Return true if the specified immediate is legal
19641/// icmp immediate, that is the target has icmp instructions which can compare
19642/// a register against the immediate without having to materialize the
19643/// immediate into a register.
19645 // Thumb2 and ARM modes can use cmn for negative immediates.
19646 if (!Subtarget->isThumb())
19647 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19648 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19649 if (Subtarget->isThumb2())
19650 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19651 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19652 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19653 return Imm >= 0 && Imm <= 255;
19654}
19655
19656/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19657/// *or sub* immediate, that is the target has add or sub instructions which can
19658/// add a register with the immediate without having to materialize the
19659/// immediate into a register.
19661 // Same encoding for add/sub, just flip the sign.
19662 int64_t AbsImm = std::abs(Imm);
19663 if (!Subtarget->isThumb())
19664 return ARM_AM::getSOImmVal(AbsImm) != -1;
19665 if (Subtarget->isThumb2())
19666 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19667 // Thumb1 only has 8-bit unsigned immediate.
19668 return AbsImm >= 0 && AbsImm <= 255;
19669}
19670
19671// Return false to prevent folding
19672// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19673// if the folding leads to worse code.
19675 SDValue ConstNode) const {
19676 // Let the DAGCombiner decide for vector types and large types.
19677 const EVT VT = AddNode.getValueType();
19678 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19679 return true;
19680
19681 // It is worse if c0 is legal add immediate, while c1*c0 is not
19682 // and has to be composed by at least two instructions.
19683 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19684 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19685 const int64_t C0 = C0Node->getSExtValue();
19686 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19688 return true;
19689 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19690 return false;
19691
19692 // Default to true and let the DAGCombiner decide.
19693 return true;
19694}
19695
19697 bool isSEXTLoad, SDValue &Base,
19698 SDValue &Offset, bool &isInc,
19699 SelectionDAG &DAG) {
19700 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19701 return false;
19702
19703 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19704 // AddressingMode 3
19705 Base = Ptr->getOperand(0);
19706 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19707 int RHSC = (int)RHS->getZExtValue();
19708 if (RHSC < 0 && RHSC > -256) {
19709 assert(Ptr->getOpcode() == ISD::ADD);
19710 isInc = false;
19711 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19712 return true;
19713 }
19714 }
19715 isInc = (Ptr->getOpcode() == ISD::ADD);
19716 Offset = Ptr->getOperand(1);
19717 return true;
19718 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19719 // AddressingMode 2
19720 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19721 int RHSC = (int)RHS->getZExtValue();
19722 if (RHSC < 0 && RHSC > -0x1000) {
19723 assert(Ptr->getOpcode() == ISD::ADD);
19724 isInc = false;
19725 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19726 Base = Ptr->getOperand(0);
19727 return true;
19728 }
19729 }
19730
19731 if (Ptr->getOpcode() == ISD::ADD) {
19732 isInc = true;
19733 ARM_AM::ShiftOpc ShOpcVal=
19734 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19735 if (ShOpcVal != ARM_AM::no_shift) {
19736 Base = Ptr->getOperand(1);
19737 Offset = Ptr->getOperand(0);
19738 } else {
19739 Base = Ptr->getOperand(0);
19740 Offset = Ptr->getOperand(1);
19741 }
19742 return true;
19743 }
19744
19745 isInc = (Ptr->getOpcode() == ISD::ADD);
19746 Base = Ptr->getOperand(0);
19747 Offset = Ptr->getOperand(1);
19748 return true;
19749 }
19750
19751 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19752 return false;
19753}
19754
19756 bool isSEXTLoad, SDValue &Base,
19757 SDValue &Offset, bool &isInc,
19758 SelectionDAG &DAG) {
19759 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19760 return false;
19761
19762 Base = Ptr->getOperand(0);
19763 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19764 int RHSC = (int)RHS->getZExtValue();
19765 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19766 assert(Ptr->getOpcode() == ISD::ADD);
19767 isInc = false;
19768 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19769 return true;
19770 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19771 isInc = Ptr->getOpcode() == ISD::ADD;
19772 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19773 return true;
19774 }
19775 }
19776
19777 return false;
19778}
19779
19780static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19781 bool isSEXTLoad, bool IsMasked, bool isLE,
19783 bool &isInc, SelectionDAG &DAG) {
19784 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19785 return false;
19786 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19787 return false;
19788
19789 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19790 // as opposed to a vldrw.32). This can allow extra addressing modes or
19791 // alignments for what is otherwise an equivalent instruction.
19792 bool CanChangeType = isLE && !IsMasked;
19793
19794 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19795 int RHSC = (int)RHS->getZExtValue();
19796
19797 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19798 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19799 assert(Ptr->getOpcode() == ISD::ADD);
19800 isInc = false;
19801 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19802 return true;
19803 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19804 isInc = Ptr->getOpcode() == ISD::ADD;
19805 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19806 return true;
19807 }
19808 return false;
19809 };
19810
19811 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19812 // (in BE/masked) type.
19813 Base = Ptr->getOperand(0);
19814 if (VT == MVT::v4i16) {
19815 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19816 return true;
19817 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19818 if (IsInRange(RHSC, 0x80, 1))
19819 return true;
19820 } else if (Alignment >= 4 &&
19821 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19822 IsInRange(RHSC, 0x80, 4))
19823 return true;
19824 else if (Alignment >= 2 &&
19825 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19826 IsInRange(RHSC, 0x80, 2))
19827 return true;
19828 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19829 return true;
19830 return false;
19831}
19832
19833/// getPreIndexedAddressParts - returns true by value, base pointer and
19834/// offset pointer and addressing mode by reference if the node's address
19835/// can be legally represented as pre-indexed load / store address.
19836bool
19838 SDValue &Offset,
19840 SelectionDAG &DAG) const {
19841 if (Subtarget->isThumb1Only())
19842 return false;
19843
19844 EVT VT;
19845 SDValue Ptr;
19846 Align Alignment;
19847 bool isSEXTLoad = false;
19848 bool IsMasked = false;
19849 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19850 Ptr = LD->getBasePtr();
19851 VT = LD->getMemoryVT();
19852 Alignment = LD->getAlign();
19853 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19854 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19855 Ptr = ST->getBasePtr();
19856 VT = ST->getMemoryVT();
19857 Alignment = ST->getAlign();
19858 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19859 Ptr = LD->getBasePtr();
19860 VT = LD->getMemoryVT();
19861 Alignment = LD->getAlign();
19862 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19863 IsMasked = true;
19864 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19865 Ptr = ST->getBasePtr();
19866 VT = ST->getMemoryVT();
19867 Alignment = ST->getAlign();
19868 IsMasked = true;
19869 } else
19870 return false;
19871
19872 bool isInc;
19873 bool isLegal = false;
19874 if (VT.isVector())
19875 isLegal = Subtarget->hasMVEIntegerOps() &&
19877 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19878 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19879 else {
19880 if (Subtarget->isThumb2())
19881 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19882 Offset, isInc, DAG);
19883 else
19884 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19885 Offset, isInc, DAG);
19886 }
19887 if (!isLegal)
19888 return false;
19889
19890 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19891 return true;
19892}
19893
19894/// getPostIndexedAddressParts - returns true by value, base pointer and
19895/// offset pointer and addressing mode by reference if this node can be
19896/// combined with a load / store to form a post-indexed load / store.
19898 SDValue &Base,
19899 SDValue &Offset,
19901 SelectionDAG &DAG) const {
19902 EVT VT;
19903 SDValue Ptr;
19904 Align Alignment;
19905 bool isSEXTLoad = false, isNonExt;
19906 bool IsMasked = false;
19907 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19908 VT = LD->getMemoryVT();
19909 Ptr = LD->getBasePtr();
19910 Alignment = LD->getAlign();
19911 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19912 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19913 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19914 VT = ST->getMemoryVT();
19915 Ptr = ST->getBasePtr();
19916 Alignment = ST->getAlign();
19917 isNonExt = !ST->isTruncatingStore();
19918 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19919 VT = LD->getMemoryVT();
19920 Ptr = LD->getBasePtr();
19921 Alignment = LD->getAlign();
19922 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19923 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19924 IsMasked = true;
19925 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19926 VT = ST->getMemoryVT();
19927 Ptr = ST->getBasePtr();
19928 Alignment = ST->getAlign();
19929 isNonExt = !ST->isTruncatingStore();
19930 IsMasked = true;
19931 } else
19932 return false;
19933
19934 if (Subtarget->isThumb1Only()) {
19935 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19936 // must be non-extending/truncating, i32, with an offset of 4.
19937 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19938 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19939 return false;
19940 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19941 if (!RHS || RHS->getZExtValue() != 4)
19942 return false;
19943 if (Alignment < Align(4))
19944 return false;
19945
19946 Offset = Op->getOperand(1);
19947 Base = Op->getOperand(0);
19948 AM = ISD::POST_INC;
19949 return true;
19950 }
19951
19952 bool isInc;
19953 bool isLegal = false;
19954 if (VT.isVector())
19955 isLegal = Subtarget->hasMVEIntegerOps() &&
19956 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19957 Subtarget->isLittle(), Base, Offset,
19958 isInc, DAG);
19959 else {
19960 if (Subtarget->isThumb2())
19961 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19962 isInc, DAG);
19963 else
19964 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19965 isInc, DAG);
19966 }
19967 if (!isLegal)
19968 return false;
19969
19970 if (Ptr != Base) {
19971 // Swap base ptr and offset to catch more post-index load / store when
19972 // it's legal. In Thumb2 mode, offset must be an immediate.
19973 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19974 !Subtarget->isThumb2())
19976
19977 // Post-indexed load / store update the base pointer.
19978 if (Ptr != Base)
19979 return false;
19980 }
19981
19982 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19983 return true;
19984}
19985
19987 KnownBits &Known,
19988 const APInt &DemandedElts,
19989 const SelectionDAG &DAG,
19990 unsigned Depth) const {
19991 unsigned BitWidth = Known.getBitWidth();
19992 Known.resetAll();
19993 switch (Op.getOpcode()) {
19994 default: break;
19995 case ARMISD::ADDC:
19996 case ARMISD::ADDE:
19997 case ARMISD::SUBC:
19998 case ARMISD::SUBE:
19999 // Special cases when we convert a carry to a boolean.
20000 if (Op.getResNo() == 0) {
20001 SDValue LHS = Op.getOperand(0);
20002 SDValue RHS = Op.getOperand(1);
20003 // (ADDE 0, 0, C) will give us a single bit.
20004 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20007 return;
20008 }
20009 }
20010 break;
20011 case ARMISD::CMOV: {
20012 // Bits are known zero/one if known on the LHS and RHS.
20013 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20014 if (Known.isUnknown())
20015 return;
20016
20017 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20018 Known = Known.intersectWith(KnownRHS);
20019 return;
20020 }
20022 Intrinsic::ID IntID =
20023 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20024 switch (IntID) {
20025 default: return;
20026 case Intrinsic::arm_ldaex:
20027 case Intrinsic::arm_ldrex: {
20028 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20029 unsigned MemBits = VT.getScalarSizeInBits();
20030 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20031 return;
20032 }
20033 }
20034 }
20035 case ARMISD::BFI: {
20036 // Conservatively, we can recurse down the first operand
20037 // and just mask out all affected bits.
20038 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20039
20040 // The operand to BFI is already a mask suitable for removing the bits it
20041 // sets.
20042 const APInt &Mask = Op.getConstantOperandAPInt(2);
20043 Known.Zero &= Mask;
20044 Known.One &= Mask;
20045 return;
20046 }
20047 case ARMISD::VGETLANEs:
20048 case ARMISD::VGETLANEu: {
20049 const SDValue &SrcSV = Op.getOperand(0);
20050 EVT VecVT = SrcSV.getValueType();
20051 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20052 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20053 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20054 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20055 "VGETLANE index out of bounds");
20056 unsigned Idx = Pos->getZExtValue();
20057 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20058 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20059
20060 EVT VT = Op.getValueType();
20061 const unsigned DstSz = VT.getScalarSizeInBits();
20062 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20063 (void)SrcSz;
20064 assert(SrcSz == Known.getBitWidth());
20065 assert(DstSz > SrcSz);
20066 if (Op.getOpcode() == ARMISD::VGETLANEs)
20067 Known = Known.sext(DstSz);
20068 else {
20069 Known = Known.zext(DstSz);
20070 }
20071 assert(DstSz == Known.getBitWidth());
20072 break;
20073 }
20074 case ARMISD::VMOVrh: {
20075 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20076 assert(KnownOp.getBitWidth() == 16);
20077 Known = KnownOp.zext(32);
20078 break;
20079 }
20080 case ARMISD::CSINC:
20081 case ARMISD::CSINV:
20082 case ARMISD::CSNEG: {
20083 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20084 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20085
20086 // The result is either:
20087 // CSINC: KnownOp0 or KnownOp1 + 1
20088 // CSINV: KnownOp0 or ~KnownOp1
20089 // CSNEG: KnownOp0 or KnownOp1 * -1
20090 if (Op.getOpcode() == ARMISD::CSINC)
20091 KnownOp1 =
20092 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20093 else if (Op.getOpcode() == ARMISD::CSINV)
20094 std::swap(KnownOp1.Zero, KnownOp1.One);
20095 else if (Op.getOpcode() == ARMISD::CSNEG)
20096 KnownOp1 = KnownBits::mul(KnownOp1,
20098
20099 Known = KnownOp0.intersectWith(KnownOp1);
20100 break;
20101 }
20102 }
20103}
20104
20106 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20107 TargetLoweringOpt &TLO) const {
20108 // Delay optimization, so we don't have to deal with illegal types, or block
20109 // optimizations.
20110 if (!TLO.LegalOps)
20111 return false;
20112
20113 // Only optimize AND for now.
20114 if (Op.getOpcode() != ISD::AND)
20115 return false;
20116
20117 EVT VT = Op.getValueType();
20118
20119 // Ignore vectors.
20120 if (VT.isVector())
20121 return false;
20122
20123 assert(VT == MVT::i32 && "Unexpected integer type");
20124
20125 // Make sure the RHS really is a constant.
20126 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20127 if (!C)
20128 return false;
20129
20130 unsigned Mask = C->getZExtValue();
20131
20132 unsigned Demanded = DemandedBits.getZExtValue();
20133 unsigned ShrunkMask = Mask & Demanded;
20134 unsigned ExpandedMask = Mask | ~Demanded;
20135
20136 // If the mask is all zeros, let the target-independent code replace the
20137 // result with zero.
20138 if (ShrunkMask == 0)
20139 return false;
20140
20141 // If the mask is all ones, erase the AND. (Currently, the target-independent
20142 // code won't do this, so we have to do it explicitly to avoid an infinite
20143 // loop in obscure cases.)
20144 if (ExpandedMask == ~0U)
20145 return TLO.CombineTo(Op, Op.getOperand(0));
20146
20147 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20148 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20149 };
20150 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20151 if (NewMask == Mask)
20152 return true;
20153 SDLoc DL(Op);
20154 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20155 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20156 return TLO.CombineTo(Op, NewOp);
20157 };
20158
20159 // Prefer uxtb mask.
20160 if (IsLegalMask(0xFF))
20161 return UseMask(0xFF);
20162
20163 // Prefer uxth mask.
20164 if (IsLegalMask(0xFFFF))
20165 return UseMask(0xFFFF);
20166
20167 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20168 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20169 if (ShrunkMask < 256)
20170 return UseMask(ShrunkMask);
20171
20172 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20173 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20174 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20175 return UseMask(ExpandedMask);
20176
20177 // Potential improvements:
20178 //
20179 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20180 // We could try to prefer Thumb1 immediates which can be lowered to a
20181 // two-instruction sequence.
20182 // We could try to recognize more legal ARM/Thumb2 immediates here.
20183
20184 return false;
20185}
20186
20188 SDValue Op, const APInt &OriginalDemandedBits,
20189 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20190 unsigned Depth) const {
20191 unsigned Opc = Op.getOpcode();
20192
20193 switch (Opc) {
20194 case ARMISD::ASRL:
20195 case ARMISD::LSRL: {
20196 // If this is result 0 and the other result is unused, see if the demand
20197 // bits allow us to shrink this long shift into a standard small shift in
20198 // the opposite direction.
20199 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20200 isa<ConstantSDNode>(Op->getOperand(2))) {
20201 unsigned ShAmt = Op->getConstantOperandVal(2);
20202 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20203 << (32 - ShAmt)))
20204 return TLO.CombineTo(
20205 Op, TLO.DAG.getNode(
20206 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20207 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20208 }
20209 break;
20210 }
20211 case ARMISD::VBICIMM: {
20212 SDValue Op0 = Op.getOperand(0);
20213 unsigned ModImm = Op.getConstantOperandVal(1);
20214 unsigned EltBits = 0;
20215 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20216 if ((OriginalDemandedBits & Mask) == 0)
20217 return TLO.CombineTo(Op, Op0);
20218 }
20219 }
20220
20222 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20223}
20224
20225//===----------------------------------------------------------------------===//
20226// ARM Inline Assembly Support
20227//===----------------------------------------------------------------------===//
20228
20230 // Looking for "rev" which is V6+.
20231 if (!Subtarget->hasV6Ops())
20232 return false;
20233
20234 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20235 StringRef AsmStr = IA->getAsmString();
20236 SmallVector<StringRef, 4> AsmPieces;
20237 SplitString(AsmStr, AsmPieces, ";\n");
20238
20239 switch (AsmPieces.size()) {
20240 default: return false;
20241 case 1:
20242 AsmStr = AsmPieces[0];
20243 AsmPieces.clear();
20244 SplitString(AsmStr, AsmPieces, " \t,");
20245
20246 // rev $0, $1
20247 if (AsmPieces.size() == 3 &&
20248 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20249 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20250 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20251 if (Ty && Ty->getBitWidth() == 32)
20253 }
20254 break;
20255 }
20256
20257 return false;
20258}
20259
20260const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20261 // At this point, we have to lower this constraint to something else, so we
20262 // lower it to an "r" or "w". However, by doing this we will force the result
20263 // to be in register, while the X constraint is much more permissive.
20264 //
20265 // Although we are correct (we are free to emit anything, without
20266 // constraints), we might break use cases that would expect us to be more
20267 // efficient and emit something else.
20268 if (!Subtarget->hasVFP2Base())
20269 return "r";
20270 if (ConstraintVT.isFloatingPoint())
20271 return "w";
20272 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20273 (ConstraintVT.getSizeInBits() == 64 ||
20274 ConstraintVT.getSizeInBits() == 128))
20275 return "w";
20276
20277 return "r";
20278}
20279
20280/// getConstraintType - Given a constraint letter, return the type of
20281/// constraint it is for this target.
20284 unsigned S = Constraint.size();
20285 if (S == 1) {
20286 switch (Constraint[0]) {
20287 default: break;
20288 case 'l': return C_RegisterClass;
20289 case 'w': return C_RegisterClass;
20290 case 'h': return C_RegisterClass;
20291 case 'x': return C_RegisterClass;
20292 case 't': return C_RegisterClass;
20293 case 'j': return C_Immediate; // Constant for movw.
20294 // An address with a single base register. Due to the way we
20295 // currently handle addresses it is the same as an 'r' memory constraint.
20296 case 'Q': return C_Memory;
20297 }
20298 } else if (S == 2) {
20299 switch (Constraint[0]) {
20300 default: break;
20301 case 'T': return C_RegisterClass;
20302 // All 'U+' constraints are addresses.
20303 case 'U': return C_Memory;
20304 }
20305 }
20306 return TargetLowering::getConstraintType(Constraint);
20307}
20308
20309/// Examine constraint type and operand type and determine a weight value.
20310/// This object must already have been set up with the operand type
20311/// and the current alternative constraint selected.
20314 AsmOperandInfo &info, const char *constraint) const {
20316 Value *CallOperandVal = info.CallOperandVal;
20317 // If we don't have a value, we can't do a match,
20318 // but allow it at the lowest weight.
20319 if (!CallOperandVal)
20320 return CW_Default;
20321 Type *type = CallOperandVal->getType();
20322 // Look at the constraint type.
20323 switch (*constraint) {
20324 default:
20326 break;
20327 case 'l':
20328 if (type->isIntegerTy()) {
20329 if (Subtarget->isThumb())
20330 weight = CW_SpecificReg;
20331 else
20332 weight = CW_Register;
20333 }
20334 break;
20335 case 'w':
20336 if (type->isFloatingPointTy())
20337 weight = CW_Register;
20338 break;
20339 }
20340 return weight;
20341}
20342
20343using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20344
20346 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20347 switch (Constraint.size()) {
20348 case 1:
20349 // GCC ARM Constraint Letters
20350 switch (Constraint[0]) {
20351 case 'l': // Low regs or general regs.
20352 if (Subtarget->isThumb())
20353 return RCPair(0U, &ARM::tGPRRegClass);
20354 return RCPair(0U, &ARM::GPRRegClass);
20355 case 'h': // High regs or no regs.
20356 if (Subtarget->isThumb())
20357 return RCPair(0U, &ARM::hGPRRegClass);
20358 break;
20359 case 'r':
20360 if (Subtarget->isThumb1Only())
20361 return RCPair(0U, &ARM::tGPRRegClass);
20362 return RCPair(0U, &ARM::GPRRegClass);
20363 case 'w':
20364 if (VT == MVT::Other)
20365 break;
20366 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20367 return RCPair(0U, &ARM::SPRRegClass);
20368 if (VT.getSizeInBits() == 64)
20369 return RCPair(0U, &ARM::DPRRegClass);
20370 if (VT.getSizeInBits() == 128)
20371 return RCPair(0U, &ARM::QPRRegClass);
20372 break;
20373 case 'x':
20374 if (VT == MVT::Other)
20375 break;
20376 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20377 return RCPair(0U, &ARM::SPR_8RegClass);
20378 if (VT.getSizeInBits() == 64)
20379 return RCPair(0U, &ARM::DPR_8RegClass);
20380 if (VT.getSizeInBits() == 128)
20381 return RCPair(0U, &ARM::QPR_8RegClass);
20382 break;
20383 case 't':
20384 if (VT == MVT::Other)
20385 break;
20386 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20387 return RCPair(0U, &ARM::SPRRegClass);
20388 if (VT.getSizeInBits() == 64)
20389 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20390 if (VT.getSizeInBits() == 128)
20391 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20392 break;
20393 }
20394 break;
20395
20396 case 2:
20397 if (Constraint[0] == 'T') {
20398 switch (Constraint[1]) {
20399 default:
20400 break;
20401 case 'e':
20402 return RCPair(0U, &ARM::tGPREvenRegClass);
20403 case 'o':
20404 return RCPair(0U, &ARM::tGPROddRegClass);
20405 }
20406 }
20407 break;
20408
20409 default:
20410 break;
20411 }
20412
20413 if (StringRef("{cc}").equals_insensitive(Constraint))
20414 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20415
20416 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20417}
20418
20419/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20420/// vector. If it is invalid, don't add anything to Ops.
20422 StringRef Constraint,
20423 std::vector<SDValue> &Ops,
20424 SelectionDAG &DAG) const {
20425 SDValue Result;
20426
20427 // Currently only support length 1 constraints.
20428 if (Constraint.size() != 1)
20429 return;
20430
20431 char ConstraintLetter = Constraint[0];
20432 switch (ConstraintLetter) {
20433 default: break;
20434 case 'j':
20435 case 'I': case 'J': case 'K': case 'L':
20436 case 'M': case 'N': case 'O':
20437 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20438 if (!C)
20439 return;
20440
20441 int64_t CVal64 = C->getSExtValue();
20442 int CVal = (int) CVal64;
20443 // None of these constraints allow values larger than 32 bits. Check
20444 // that the value fits in an int.
20445 if (CVal != CVal64)
20446 return;
20447
20448 switch (ConstraintLetter) {
20449 case 'j':
20450 // Constant suitable for movw, must be between 0 and
20451 // 65535.
20452 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20453 if (CVal >= 0 && CVal <= 65535)
20454 break;
20455 return;
20456 case 'I':
20457 if (Subtarget->isThumb1Only()) {
20458 // This must be a constant between 0 and 255, for ADD
20459 // immediates.
20460 if (CVal >= 0 && CVal <= 255)
20461 break;
20462 } else if (Subtarget->isThumb2()) {
20463 // A constant that can be used as an immediate value in a
20464 // data-processing instruction.
20465 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20466 break;
20467 } else {
20468 // A constant that can be used as an immediate value in a
20469 // data-processing instruction.
20470 if (ARM_AM::getSOImmVal(CVal) != -1)
20471 break;
20472 }
20473 return;
20474
20475 case 'J':
20476 if (Subtarget->isThumb1Only()) {
20477 // This must be a constant between -255 and -1, for negated ADD
20478 // immediates. This can be used in GCC with an "n" modifier that
20479 // prints the negated value, for use with SUB instructions. It is
20480 // not useful otherwise but is implemented for compatibility.
20481 if (CVal >= -255 && CVal <= -1)
20482 break;
20483 } else {
20484 // This must be a constant between -4095 and 4095. It is not clear
20485 // what this constraint is intended for. Implemented for
20486 // compatibility with GCC.
20487 if (CVal >= -4095 && CVal <= 4095)
20488 break;
20489 }
20490 return;
20491
20492 case 'K':
20493 if (Subtarget->isThumb1Only()) {
20494 // A 32-bit value where only one byte has a nonzero value. Exclude
20495 // zero to match GCC. This constraint is used by GCC internally for
20496 // constants that can be loaded with a move/shift combination.
20497 // It is not useful otherwise but is implemented for compatibility.
20498 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20499 break;
20500 } else if (Subtarget->isThumb2()) {
20501 // A constant whose bitwise inverse can be used as an immediate
20502 // value in a data-processing instruction. This can be used in GCC
20503 // with a "B" modifier that prints the inverted value, for use with
20504 // BIC and MVN instructions. It is not useful otherwise but is
20505 // implemented for compatibility.
20506 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20507 break;
20508 } else {
20509 // A constant whose bitwise inverse can be used as an immediate
20510 // value in a data-processing instruction. This can be used in GCC
20511 // with a "B" modifier that prints the inverted value, for use with
20512 // BIC and MVN instructions. It is not useful otherwise but is
20513 // implemented for compatibility.
20514 if (ARM_AM::getSOImmVal(~CVal) != -1)
20515 break;
20516 }
20517 return;
20518
20519 case 'L':
20520 if (Subtarget->isThumb1Only()) {
20521 // This must be a constant between -7 and 7,
20522 // for 3-operand ADD/SUB immediate instructions.
20523 if (CVal >= -7 && CVal < 7)
20524 break;
20525 } else if (Subtarget->isThumb2()) {
20526 // A constant whose negation can be used as an immediate value in a
20527 // data-processing instruction. This can be used in GCC with an "n"
20528 // modifier that prints the negated value, for use with SUB
20529 // instructions. It is not useful otherwise but is implemented for
20530 // compatibility.
20531 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20532 break;
20533 } else {
20534 // A constant whose negation can be used as an immediate value in a
20535 // data-processing instruction. This can be used in GCC with an "n"
20536 // modifier that prints the negated value, for use with SUB
20537 // instructions. It is not useful otherwise but is implemented for
20538 // compatibility.
20539 if (ARM_AM::getSOImmVal(-CVal) != -1)
20540 break;
20541 }
20542 return;
20543
20544 case 'M':
20545 if (Subtarget->isThumb1Only()) {
20546 // This must be a multiple of 4 between 0 and 1020, for
20547 // ADD sp + immediate.
20548 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20549 break;
20550 } else {
20551 // A power of two or a constant between 0 and 32. This is used in
20552 // GCC for the shift amount on shifted register operands, but it is
20553 // useful in general for any shift amounts.
20554 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20555 break;
20556 }
20557 return;
20558
20559 case 'N':
20560 if (Subtarget->isThumb1Only()) {
20561 // This must be a constant between 0 and 31, for shift amounts.
20562 if (CVal >= 0 && CVal <= 31)
20563 break;
20564 }
20565 return;
20566
20567 case 'O':
20568 if (Subtarget->isThumb1Only()) {
20569 // This must be a multiple of 4 between -508 and 508, for
20570 // ADD/SUB sp = sp + immediate.
20571 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20572 break;
20573 }
20574 return;
20575 }
20576 Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20577 break;
20578 }
20579
20580 if (Result.getNode()) {
20581 Ops.push_back(Result);
20582 return;
20583 }
20584 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20585}
20586
20588 const SDNode *N, MVT::SimpleValueType SVT) {
20589 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20590 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20591 "Unhandled Opcode in getDivRemLibcall");
20592 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20593 N->getOpcode() == ISD::SREM;
20594 RTLIB::Libcall LC;
20595 switch (SVT) {
20596 default: llvm_unreachable("Unexpected request for libcall!");
20597 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20598 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20599 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20600 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20601 }
20602 return LC;
20603}
20604
20606 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20607 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20608 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20609 "Unhandled Opcode in getDivRemArgList");
20610 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20611 N->getOpcode() == ISD::SREM;
20614 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20615 EVT ArgVT = N->getOperand(i).getValueType();
20616 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20617 Entry.Node = N->getOperand(i);
20618 Entry.Ty = ArgTy;
20619 Entry.IsSExt = isSigned;
20620 Entry.IsZExt = !isSigned;
20621 Args.push_back(Entry);
20622 }
20623 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20624 std::swap(Args[0], Args[1]);
20625 return Args;
20626}
20627
20628SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20629 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20630 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20631 Subtarget->isTargetWindows()) &&
20632 "Register-based DivRem lowering only");
20633 unsigned Opcode = Op->getOpcode();
20634 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20635 "Invalid opcode for Div/Rem lowering");
20636 bool isSigned = (Opcode == ISD::SDIVREM);
20637 EVT VT = Op->getValueType(0);
20638 SDLoc dl(Op);
20639
20640 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20642 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20643 SDValue Res0 =
20644 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20645 SDValue Res1 =
20646 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20647 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20648 {Res0, Res1});
20649 }
20650 }
20651
20652 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20653
20654 // If the target has hardware divide, use divide + multiply + subtract:
20655 // div = a / b
20656 // rem = a - b * div
20657 // return {div, rem}
20658 // This should be lowered into UDIV/SDIV + MLS later on.
20659 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20660 : Subtarget->hasDivideInARMMode();
20661 if (hasDivide && Op->getValueType(0).isSimple() &&
20662 Op->getSimpleValueType(0) == MVT::i32) {
20663 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20664 const SDValue Dividend = Op->getOperand(0);
20665 const SDValue Divisor = Op->getOperand(1);
20666 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20667 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20668 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20669
20670 SDValue Values[2] = {Div, Rem};
20671 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20672 }
20673
20674 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20675 VT.getSimpleVT().SimpleTy);
20676 SDValue InChain = DAG.getEntryNode();
20677
20679 DAG.getContext(),
20680 Subtarget);
20681
20684
20685 Type *RetTy = StructType::get(Ty, Ty);
20686
20687 if (Subtarget->isTargetWindows())
20688 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20689
20691 CLI.setDebugLoc(dl).setChain(InChain)
20692 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20694
20695 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20696 return CallInfo.first;
20697}
20698
20699// Lowers REM using divmod helpers
20700// see RTABI section 4.2/4.3
20701SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20702 EVT VT = N->getValueType(0);
20703
20704 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20706 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20707 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20708 Result[0], Result[1]);
20709 }
20710
20711 // Build return types (div and rem)
20712 std::vector<Type*> RetTyParams;
20713 Type *RetTyElement;
20714
20715 switch (VT.getSimpleVT().SimpleTy) {
20716 default: llvm_unreachable("Unexpected request for libcall!");
20717 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20718 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20719 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20720 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20721 }
20722
20723 RetTyParams.push_back(RetTyElement);
20724 RetTyParams.push_back(RetTyElement);
20725 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20726 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20727
20728 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20729 SimpleTy);
20730 SDValue InChain = DAG.getEntryNode();
20732 Subtarget);
20733 bool isSigned = N->getOpcode() == ISD::SREM;
20736
20737 if (Subtarget->isTargetWindows())
20738 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20739
20740 // Lower call
20741 CallLoweringInfo CLI(DAG);
20742 CLI.setChain(InChain)
20743 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20745 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20746
20747 // Return second (rem) result operand (first contains div)
20748 SDNode *ResNode = CallResult.first.getNode();
20749 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20750 return ResNode->getOperand(1);
20751}
20752
20753SDValue
20754ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20755 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20756 SDLoc DL(Op);
20757
20758 // Get the inputs.
20759 SDValue Chain = Op.getOperand(0);
20760 SDValue Size = Op.getOperand(1);
20761
20763 "no-stack-arg-probe")) {
20765 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20766 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20767 Chain = SP.getValue(1);
20768 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20769 if (Align)
20770 SP =
20771 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20772 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20773 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20774 SDValue Ops[2] = { SP, Chain };
20775 return DAG.getMergeValues(Ops, DL);
20776 }
20777
20778 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20779 DAG.getConstant(2, DL, MVT::i32));
20780
20781 SDValue Glue;
20782 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20783 Glue = Chain.getValue(1);
20784
20785 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20786 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20787
20788 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20789 Chain = NewSP.getValue(1);
20790
20791 SDValue Ops[2] = { NewSP, Chain };
20792 return DAG.getMergeValues(Ops, DL);
20793}
20794
20795SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20796 bool IsStrict = Op->isStrictFPOpcode();
20797 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20798 const unsigned DstSz = Op.getValueType().getSizeInBits();
20799 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20800 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20801 "Unexpected type for custom-lowering FP_EXTEND");
20802
20803 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20804 "With both FP DP and 16, any FP conversion is legal!");
20805
20806 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20807 "With FP16, 16 to 32 conversion is legal!");
20808
20809 // Converting from 32 -> 64 is valid if we have FP64.
20810 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20811 // FIXME: Remove this when we have strict fp instruction selection patterns
20812 if (IsStrict) {
20813 SDLoc Loc(Op);
20815 Loc, Op.getValueType(), SrcVal);
20816 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20817 }
20818 return Op;
20819 }
20820
20821 // Either we are converting from 16 -> 64, without FP16 and/or
20822 // FP.double-precision or without Armv8-fp. So we must do it in two
20823 // steps.
20824 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20825 // without FP16. So we must do a function call.
20826 SDLoc Loc(Op);
20827 RTLIB::Libcall LC;
20828 MakeLibCallOptions CallOptions;
20829 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20830 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20831 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20832 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20833 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20834 if (Supported) {
20835 if (IsStrict) {
20836 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20837 {DstVT, MVT::Other}, {Chain, SrcVal});
20838 Chain = SrcVal.getValue(1);
20839 } else {
20840 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20841 }
20842 } else {
20843 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20844 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20845 "Unexpected type for custom-lowering FP_EXTEND");
20846 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20847 Loc, Chain);
20848 }
20849 }
20850
20851 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20852}
20853
20854SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20855 bool IsStrict = Op->isStrictFPOpcode();
20856
20857 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20858 EVT SrcVT = SrcVal.getValueType();
20859 EVT DstVT = Op.getValueType();
20860 const unsigned DstSz = Op.getValueType().getSizeInBits();
20861 const unsigned SrcSz = SrcVT.getSizeInBits();
20862 (void)DstSz;
20863 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20864 "Unexpected type for custom-lowering FP_ROUND");
20865
20866 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20867 "With both FP DP and 16, any FP conversion is legal!");
20868
20869 SDLoc Loc(Op);
20870
20871 // Instruction from 32 -> 16 if hasFP16 is valid
20872 if (SrcSz == 32 && Subtarget->hasFP16())
20873 return Op;
20874
20875 // Lib call from 32 -> 16 / 64 -> [32, 16]
20876 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20877 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20878 "Unexpected type for custom-lowering FP_ROUND");
20879 MakeLibCallOptions CallOptions;
20880 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20882 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20883 Loc, Chain);
20884 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20885}
20886
20887bool
20889 // The ARM target isn't yet aware of offsets.
20890 return false;
20891}
20892
20894 if (v == 0xffffffff)
20895 return false;
20896
20897 // there can be 1's on either or both "outsides", all the "inside"
20898 // bits must be 0's
20899 return isShiftedMask_32(~v);
20900}
20901
20902/// isFPImmLegal - Returns true if the target can instruction select the
20903/// specified FP immediate natively. If false, the legalizer will
20904/// materialize the FP immediate as a load from a constant pool.
20906 bool ForCodeSize) const {
20907 if (!Subtarget->hasVFP3Base())
20908 return false;
20909 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20910 return ARM_AM::getFP16Imm(Imm) != -1;
20911 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20912 ARM_AM::getFP32FP16Imm(Imm) != -1)
20913 return true;
20914 if (VT == MVT::f32)
20915 return ARM_AM::getFP32Imm(Imm) != -1;
20916 if (VT == MVT::f64 && Subtarget->hasFP64())
20917 return ARM_AM::getFP64Imm(Imm) != -1;
20918 return false;
20919}
20920
20921/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20922/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20923/// specified in the intrinsic calls.
20925 const CallInst &I,
20926 MachineFunction &MF,
20927 unsigned Intrinsic) const {
20928 switch (Intrinsic) {
20929 case Intrinsic::arm_neon_vld1:
20930 case Intrinsic::arm_neon_vld2:
20931 case Intrinsic::arm_neon_vld3:
20932 case Intrinsic::arm_neon_vld4:
20933 case Intrinsic::arm_neon_vld2lane:
20934 case Intrinsic::arm_neon_vld3lane:
20935 case Intrinsic::arm_neon_vld4lane:
20936 case Intrinsic::arm_neon_vld2dup:
20937 case Intrinsic::arm_neon_vld3dup:
20938 case Intrinsic::arm_neon_vld4dup: {
20940 // Conservatively set memVT to the entire set of vectors loaded.
20941 auto &DL = I.getDataLayout();
20942 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20943 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20944 Info.ptrVal = I.getArgOperand(0);
20945 Info.offset = 0;
20946 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20947 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20948 // volatile loads with NEON intrinsics not supported
20950 return true;
20951 }
20952 case Intrinsic::arm_neon_vld1x2:
20953 case Intrinsic::arm_neon_vld1x3:
20954 case Intrinsic::arm_neon_vld1x4: {
20956 // Conservatively set memVT to the entire set of vectors loaded.
20957 auto &DL = I.getDataLayout();
20958 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20959 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20960 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20961 Info.offset = 0;
20962 Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
20963 // volatile loads with NEON intrinsics not supported
20965 return true;
20966 }
20967 case Intrinsic::arm_neon_vst1:
20968 case Intrinsic::arm_neon_vst2:
20969 case Intrinsic::arm_neon_vst3:
20970 case Intrinsic::arm_neon_vst4:
20971 case Intrinsic::arm_neon_vst2lane:
20972 case Intrinsic::arm_neon_vst3lane:
20973 case Intrinsic::arm_neon_vst4lane: {
20975 // Conservatively set memVT to the entire set of vectors stored.
20976 auto &DL = I.getDataLayout();
20977 unsigned NumElts = 0;
20978 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20979 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20980 if (!ArgTy->isVectorTy())
20981 break;
20982 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20983 }
20984 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20985 Info.ptrVal = I.getArgOperand(0);
20986 Info.offset = 0;
20987 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20988 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20989 // volatile stores with NEON intrinsics not supported
20991 return true;
20992 }
20993 case Intrinsic::arm_neon_vst1x2:
20994 case Intrinsic::arm_neon_vst1x3:
20995 case Intrinsic::arm_neon_vst1x4: {
20997 // Conservatively set memVT to the entire set of vectors stored.
20998 auto &DL = I.getDataLayout();
20999 unsigned NumElts = 0;
21000 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21001 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21002 if (!ArgTy->isVectorTy())
21003 break;
21004 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21005 }
21006 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21007 Info.ptrVal = I.getArgOperand(0);
21008 Info.offset = 0;
21009 Info.align = I.getParamAlign(0).valueOrOne();
21010 // volatile stores with NEON intrinsics not supported
21012 return true;
21013 }
21014 case Intrinsic::arm_mve_vld2q:
21015 case Intrinsic::arm_mve_vld4q: {
21017 // Conservatively set memVT to the entire set of vectors loaded.
21018 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21019 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21020 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21021 Info.ptrVal = I.getArgOperand(0);
21022 Info.offset = 0;
21023 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21024 // volatile loads with MVE intrinsics not supported
21026 return true;
21027 }
21028 case Intrinsic::arm_mve_vst2q:
21029 case Intrinsic::arm_mve_vst4q: {
21031 // Conservatively set memVT to the entire set of vectors stored.
21032 Type *VecTy = I.getArgOperand(1)->getType();
21033 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21034 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21035 Info.ptrVal = I.getArgOperand(0);
21036 Info.offset = 0;
21037 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21038 // volatile stores with MVE intrinsics not supported
21040 return true;
21041 }
21042 case Intrinsic::arm_mve_vldr_gather_base:
21043 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21045 Info.ptrVal = nullptr;
21046 Info.memVT = MVT::getVT(I.getType());
21047 Info.align = Align(1);
21049 return true;
21050 }
21051 case Intrinsic::arm_mve_vldr_gather_base_wb:
21052 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21054 Info.ptrVal = nullptr;
21055 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21056 Info.align = Align(1);
21058 return true;
21059 }
21060 case Intrinsic::arm_mve_vldr_gather_offset:
21061 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21063 Info.ptrVal = nullptr;
21064 MVT DataVT = MVT::getVT(I.getType());
21065 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21066 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21067 DataVT.getVectorNumElements());
21068 Info.align = Align(1);
21070 return true;
21071 }
21072 case Intrinsic::arm_mve_vstr_scatter_base:
21073 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21075 Info.ptrVal = nullptr;
21076 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21077 Info.align = Align(1);
21079 return true;
21080 }
21081 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21082 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21084 Info.ptrVal = nullptr;
21085 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21086 Info.align = Align(1);
21088 return true;
21089 }
21090 case Intrinsic::arm_mve_vstr_scatter_offset:
21091 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21093 Info.ptrVal = nullptr;
21094 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21095 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21096 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21097 DataVT.getVectorNumElements());
21098 Info.align = Align(1);
21100 return true;
21101 }
21102 case Intrinsic::arm_ldaex:
21103 case Intrinsic::arm_ldrex: {
21104 auto &DL = I.getDataLayout();
21105 Type *ValTy = I.getParamElementType(0);
21107 Info.memVT = MVT::getVT(ValTy);
21108 Info.ptrVal = I.getArgOperand(0);
21109 Info.offset = 0;
21110 Info.align = DL.getABITypeAlign(ValTy);
21112 return true;
21113 }
21114 case Intrinsic::arm_stlex:
21115 case Intrinsic::arm_strex: {
21116 auto &DL = I.getDataLayout();
21117 Type *ValTy = I.getParamElementType(1);
21119 Info.memVT = MVT::getVT(ValTy);
21120 Info.ptrVal = I.getArgOperand(1);
21121 Info.offset = 0;
21122 Info.align = DL.getABITypeAlign(ValTy);
21124 return true;
21125 }
21126 case Intrinsic::arm_stlexd:
21127 case Intrinsic::arm_strexd:
21129 Info.memVT = MVT::i64;
21130 Info.ptrVal = I.getArgOperand(2);
21131 Info.offset = 0;
21132 Info.align = Align(8);
21134 return true;
21135
21136 case Intrinsic::arm_ldaexd:
21137 case Intrinsic::arm_ldrexd:
21139 Info.memVT = MVT::i64;
21140 Info.ptrVal = I.getArgOperand(0);
21141 Info.offset = 0;
21142 Info.align = Align(8);
21144 return true;
21145
21146 default:
21147 break;
21148 }
21149
21150 return false;
21151}
21152
21153/// Returns true if it is beneficial to convert a load of a constant
21154/// to just the constant itself.
21156 Type *Ty) const {
21157 assert(Ty->isIntegerTy());
21158
21159 unsigned Bits = Ty->getPrimitiveSizeInBits();
21160 if (Bits == 0 || Bits > 32)
21161 return false;
21162 return true;
21163}
21164
21166 unsigned Index) const {
21168 return false;
21169
21170 return (Index == 0 || Index == ResVT.getVectorNumElements());
21171}
21172
21174 ARM_MB::MemBOpt Domain) const {
21175 // First, if the target has no DMB, see what fallback we can use.
21176 if (!Subtarget->hasDataBarrier()) {
21177 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21178 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21179 // here.
21180 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21181 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21182 Builder.getInt32(0), Builder.getInt32(7),
21183 Builder.getInt32(10), Builder.getInt32(5)};
21184 return Builder.CreateIntrinsic(Intrinsic::arm_mcr, {}, args);
21185 } else {
21186 // Instead of using barriers, atomic accesses on these subtargets use
21187 // libcalls.
21188 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21189 }
21190 } else {
21191 // Only a full system barrier exists in the M-class architectures.
21192 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21193 Constant *CDomain = Builder.getInt32(Domain);
21194 return Builder.CreateIntrinsic(Intrinsic::arm_dmb, {}, CDomain);
21195 }
21196}
21197
21198// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21200 Instruction *Inst,
21201 AtomicOrdering Ord) const {
21202 switch (Ord) {
21205 llvm_unreachable("Invalid fence: unordered/non-atomic");
21208 return nullptr; // Nothing to do
21210 if (!Inst->hasAtomicStore())
21211 return nullptr; // Nothing to do
21212 [[fallthrough]];
21215 if (Subtarget->preferISHSTBarriers())
21216 return makeDMB(Builder, ARM_MB::ISHST);
21217 // FIXME: add a comment with a link to documentation justifying this.
21218 else
21219 return makeDMB(Builder, ARM_MB::ISH);
21220 }
21221 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21222}
21223
21225 Instruction *Inst,
21226 AtomicOrdering Ord) const {
21227 switch (Ord) {
21230 llvm_unreachable("Invalid fence: unordered/not-atomic");
21233 return nullptr; // Nothing to do
21237 return makeDMB(Builder, ARM_MB::ISH);
21238 }
21239 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21240}
21241
21242// Loads and stores less than 64-bits are already atomic; ones above that
21243// are doomed anyway, so defer to the default libcall and blame the OS when
21244// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21245// anything for those.
21248 bool has64BitAtomicStore;
21249 if (Subtarget->isMClass())
21250 has64BitAtomicStore = false;
21251 else if (Subtarget->isThumb())
21252 has64BitAtomicStore = Subtarget->hasV7Ops();
21253 else
21254 has64BitAtomicStore = Subtarget->hasV6Ops();
21255
21256 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21257 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21259}
21260
21261// Loads and stores less than 64-bits are already atomic; ones above that
21262// are doomed anyway, so defer to the default libcall and blame the OS when
21263// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21264// anything for those.
21265// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21266// guarantee, see DDI0406C ARM architecture reference manual,
21267// sections A8.8.72-74 LDRD)
21270 bool has64BitAtomicLoad;
21271 if (Subtarget->isMClass())
21272 has64BitAtomicLoad = false;
21273 else if (Subtarget->isThumb())
21274 has64BitAtomicLoad = Subtarget->hasV7Ops();
21275 else
21276 has64BitAtomicLoad = Subtarget->hasV6Ops();
21277
21278 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21279 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21281}
21282
21283// For the real atomic operations, we have ldrex/strex up to 32 bits,
21284// and up to 64 bits on the non-M profiles
21287 if (AI->isFloatingPointOperation())
21289
21290 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21291 bool hasAtomicRMW;
21292 if (Subtarget->isMClass())
21293 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21294 else if (Subtarget->isThumb())
21295 hasAtomicRMW = Subtarget->hasV7Ops();
21296 else
21297 hasAtomicRMW = Subtarget->hasV6Ops();
21298 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21299 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21300 // implement atomicrmw without spilling. If the target address is also on
21301 // the stack and close enough to the spill slot, this can lead to a
21302 // situation where the monitor always gets cleared and the atomic operation
21303 // can never succeed. So at -O0 lower this operation to a CAS loop.
21304 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21307 }
21309}
21310
21311// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21312// bits, and up to 64 bits on the non-M profiles.
21315 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21316 // implement cmpxchg without spilling. If the address being exchanged is also
21317 // on the stack and close enough to the spill slot, this can lead to a
21318 // situation where the monitor always gets cleared and the atomic operation
21319 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21320 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21321 bool HasAtomicCmpXchg;
21322 if (Subtarget->isMClass())
21323 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21324 else if (Subtarget->isThumb())
21325 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21326 else
21327 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21328 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21329 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21332}
21333
21335 const Instruction *I) const {
21336 return InsertFencesForAtomic;
21337}
21338
21340 // ROPI/RWPI are not supported currently.
21341 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21342}
21343
21345 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21347
21348 // MSVC CRT has a global variable holding security cookie.
21349 M.getOrInsertGlobal("__security_cookie",
21350 PointerType::getUnqual(M.getContext()));
21351
21352 // MSVC CRT has a function to validate security cookie.
21353 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21354 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21355 PointerType::getUnqual(M.getContext()));
21356 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21357 F->addParamAttr(0, Attribute::AttrKind::InReg);
21358}
21359
21361 // MSVC CRT has a global variable holding security cookie.
21362 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21363 return M.getGlobalVariable("__security_cookie");
21365}
21366
21368 // MSVC CRT has a function to validate security cookie.
21369 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21370 return M.getFunction("__security_check_cookie");
21372}
21373
21375 unsigned &Cost) const {
21376 // If we do not have NEON, vector types are not natively supported.
21377 if (!Subtarget->hasNEON())
21378 return false;
21379
21380 // Floating point values and vector values map to the same register file.
21381 // Therefore, although we could do a store extract of a vector type, this is
21382 // better to leave at float as we have more freedom in the addressing mode for
21383 // those.
21384 if (VectorTy->isFPOrFPVectorTy())
21385 return false;
21386
21387 // If the index is unknown at compile time, this is very expensive to lower
21388 // and it is not possible to combine the store with the extract.
21389 if (!isa<ConstantInt>(Idx))
21390 return false;
21391
21392 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21393 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21394 // We can do a store + vector extract on any vector that fits perfectly in a D
21395 // or Q register.
21396 if (BitWidth == 64 || BitWidth == 128) {
21397 Cost = 0;
21398 return true;
21399 }
21400 return false;
21401}
21402
21404 return Subtarget->hasV6T2Ops();
21405}
21406
21408 return Subtarget->hasV6T2Ops();
21409}
21410
21412 const Instruction &AndI) const {
21413 if (!Subtarget->hasV7Ops())
21414 return false;
21415
21416 // Sink the `and` instruction only if the mask would fit into a modified
21417 // immediate operand.
21418 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21419 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21420 return false;
21421 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21422 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21423 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21424}
21425
21428 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21429 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21432 ExpansionFactor);
21433}
21434
21436 Value *Addr,
21437 AtomicOrdering Ord) const {
21438 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21439 bool IsAcquire = isAcquireOrStronger(Ord);
21440
21441 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21442 // intrinsic must return {i32, i32} and we have to recombine them into a
21443 // single i64 here.
21444 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21446 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21447
21448 Value *LoHi =
21449 Builder.CreateIntrinsic(Int, {}, Addr, /*FMFSource=*/nullptr, "lohi");
21450
21451 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21452 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21453 if (!Subtarget->isLittle())
21454 std::swap (Lo, Hi);
21455 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21456 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21457 return Builder.CreateOr(
21458 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21459 }
21460
21461 Type *Tys[] = { Addr->getType() };
21462 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21463 CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
21464
21465 CI->addParamAttr(
21466 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21467 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21468}
21469
21471 IRBuilderBase &Builder) const {
21472 if (!Subtarget->hasV7Ops())
21473 return;
21474 Builder.CreateIntrinsic(Intrinsic::arm_clrex, {}, {});
21475}
21476
21478 Value *Val, Value *Addr,
21479 AtomicOrdering Ord) const {
21480 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21481 bool IsRelease = isReleaseOrStronger(Ord);
21482
21483 // Since the intrinsics must have legal type, the i64 intrinsics take two
21484 // parameters: "i32, i32". We must marshal Val into the appropriate form
21485 // before the call.
21486 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21488 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21489 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21490
21491 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21492 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21493 if (!Subtarget->isLittle())
21494 std::swap(Lo, Hi);
21495 return Builder.CreateIntrinsic(Int, {}, {Lo, Hi, Addr});
21496 }
21497
21498 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21499 Type *Tys[] = { Addr->getType() };
21501
21502 CallInst *CI = Builder.CreateCall(
21503 Strex, {Builder.CreateZExtOrBitCast(
21504 Val, Strex->getFunctionType()->getParamType(0)),
21505 Addr});
21506 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21507 Val->getType()));
21508 return CI;
21509}
21510
21511
21513 return Subtarget->isMClass();
21514}
21515
21516/// A helper function for determining the number of interleaved accesses we
21517/// will generate when lowering accesses of the given type.
21518unsigned
21520 const DataLayout &DL) const {
21521 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21522}
21523
21525 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21526 const DataLayout &DL) const {
21527
21528 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21529 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21530
21531 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21532 return false;
21533
21534 // Ensure the vector doesn't have f16 elements. Even though we could do an
21535 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21536 // f32.
21537 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21538 return false;
21539 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21540 return false;
21541
21542 // Ensure the number of vector elements is greater than 1.
21543 if (VecTy->getNumElements() < 2)
21544 return false;
21545
21546 // Ensure the element type is legal.
21547 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21548 return false;
21549 // And the alignment if high enough under MVE.
21550 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21551 return false;
21552
21553 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21554 // 128 will be split into multiple interleaved accesses.
21555 if (Subtarget->hasNEON() && VecSize == 64)
21556 return true;
21557 return VecSize % 128 == 0;
21558}
21559
21561 if (Subtarget->hasNEON())
21562 return 4;
21563 if (Subtarget->hasMVEIntegerOps())
21566}
21567
21568/// Lower an interleaved load into a vldN intrinsic.
21569///
21570/// E.g. Lower an interleaved load (Factor = 2):
21571/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21572/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21573/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21574///
21575/// Into:
21576/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21577/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21578/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21581 ArrayRef<unsigned> Indices, unsigned Factor) const {
21582 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21583 "Invalid interleave factor");
21584 assert(!Shuffles.empty() && "Empty shufflevector input");
21585 assert(Shuffles.size() == Indices.size() &&
21586 "Unmatched number of shufflevectors and indices");
21587
21588 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21589 Type *EltTy = VecTy->getElementType();
21590
21591 const DataLayout &DL = LI->getDataLayout();
21592 Align Alignment = LI->getAlign();
21593
21594 // Skip if we do not have NEON and skip illegal vector types. We can
21595 // "legalize" wide vector types into multiple interleaved accesses as long as
21596 // the vector types are divisible by 128.
21597 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21598 return false;
21599
21600 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21601
21602 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21603 // load integer vectors first and then convert to pointer vectors.
21604 if (EltTy->isPointerTy())
21605 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21606
21607 IRBuilder<> Builder(LI);
21608
21609 // The base address of the load.
21610 Value *BaseAddr = LI->getPointerOperand();
21611
21612 if (NumLoads > 1) {
21613 // If we're going to generate more than one load, reset the sub-vector type
21614 // to something legal.
21615 VecTy = FixedVectorType::get(VecTy->getElementType(),
21616 VecTy->getNumElements() / NumLoads);
21617 }
21618
21619 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21620
21621 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21622 if (Subtarget->hasNEON()) {
21623 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21624 Type *Tys[] = {VecTy, PtrTy};
21625 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21626 Intrinsic::arm_neon_vld3,
21627 Intrinsic::arm_neon_vld4};
21628
21630 Ops.push_back(BaseAddr);
21631 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21632
21633 return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,
21634 /*FMFSource=*/nullptr, "vldN");
21635 } else {
21636 assert((Factor == 2 || Factor == 4) &&
21637 "expected interleave factor of 2 or 4 for MVE");
21638 Intrinsic::ID LoadInts =
21639 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21640 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21641 Type *Tys[] = {VecTy, PtrTy};
21642
21644 Ops.push_back(BaseAddr);
21645 return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,
21646 "vldN");
21647 }
21648 };
21649
21650 // Holds sub-vectors extracted from the load intrinsic return values. The
21651 // sub-vectors are associated with the shufflevector instructions they will
21652 // replace.
21654
21655 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21656 // If we're generating more than one load, compute the base address of
21657 // subsequent loads as an offset from the previous.
21658 if (LoadCount > 0)
21659 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21660 VecTy->getNumElements() * Factor);
21661
21662 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21663
21664 // Replace uses of each shufflevector with the corresponding vector loaded
21665 // by ldN.
21666 for (unsigned i = 0; i < Shuffles.size(); i++) {
21667 ShuffleVectorInst *SV = Shuffles[i];
21668 unsigned Index = Indices[i];
21669
21670 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21671
21672 // Convert the integer vector to pointer vector if the element is pointer.
21673 if (EltTy->isPointerTy())
21674 SubVec = Builder.CreateIntToPtr(
21675 SubVec,
21677
21678 SubVecs[SV].push_back(SubVec);
21679 }
21680 }
21681
21682 // Replace uses of the shufflevector instructions with the sub-vectors
21683 // returned by the load intrinsic. If a shufflevector instruction is
21684 // associated with more than one sub-vector, those sub-vectors will be
21685 // concatenated into a single wide vector.
21686 for (ShuffleVectorInst *SVI : Shuffles) {
21687 auto &SubVec = SubVecs[SVI];
21688 auto *WideVec =
21689 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21690 SVI->replaceAllUsesWith(WideVec);
21691 }
21692
21693 return true;
21694}
21695
21696/// Lower an interleaved store into a vstN intrinsic.
21697///
21698/// E.g. Lower an interleaved store (Factor = 3):
21699/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21700/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21701/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21702///
21703/// Into:
21704/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21705/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21706/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21707/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21708///
21709/// Note that the new shufflevectors will be removed and we'll only generate one
21710/// vst3 instruction in CodeGen.
21711///
21712/// Example for a more general valid mask (Factor 3). Lower:
21713/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21714/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21715/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21716///
21717/// Into:
21718/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21719/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21720/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21721/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21723 ShuffleVectorInst *SVI,
21724 unsigned Factor) const {
21725 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21726 "Invalid interleave factor");
21727
21728 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21729 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21730
21731 unsigned LaneLen = VecTy->getNumElements() / Factor;
21732 Type *EltTy = VecTy->getElementType();
21733 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21734
21735 const DataLayout &DL = SI->getDataLayout();
21736 Align Alignment = SI->getAlign();
21737
21738 // Skip if we do not have NEON and skip illegal vector types. We can
21739 // "legalize" wide vector types into multiple interleaved accesses as long as
21740 // the vector types are divisible by 128.
21741 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21742 return false;
21743
21744 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21745
21746 Value *Op0 = SVI->getOperand(0);
21747 Value *Op1 = SVI->getOperand(1);
21748 IRBuilder<> Builder(SI);
21749
21750 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21751 // vectors to integer vectors.
21752 if (EltTy->isPointerTy()) {
21753 Type *IntTy = DL.getIntPtrType(EltTy);
21754
21755 // Convert to the corresponding integer vector.
21756 auto *IntVecTy =
21757 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21758 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21759 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21760
21761 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21762 }
21763
21764 // The base address of the store.
21765 Value *BaseAddr = SI->getPointerOperand();
21766
21767 if (NumStores > 1) {
21768 // If we're going to generate more than one store, reset the lane length
21769 // and sub-vector type to something legal.
21770 LaneLen /= NumStores;
21771 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21772 }
21773
21774 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21775
21776 auto Mask = SVI->getShuffleMask();
21777
21778 auto createStoreIntrinsic = [&](Value *BaseAddr,
21779 SmallVectorImpl<Value *> &Shuffles) {
21780 if (Subtarget->hasNEON()) {
21781 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21782 Intrinsic::arm_neon_vst3,
21783 Intrinsic::arm_neon_vst4};
21784 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21785 Type *Tys[] = {PtrTy, SubVecTy};
21786
21788 Ops.push_back(BaseAddr);
21789 append_range(Ops, Shuffles);
21790 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21791 Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);
21792 } else {
21793 assert((Factor == 2 || Factor == 4) &&
21794 "expected interleave factor of 2 or 4 for MVE");
21795 Intrinsic::ID StoreInts =
21796 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21797 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21798 Type *Tys[] = {PtrTy, SubVecTy};
21799
21801 Ops.push_back(BaseAddr);
21802 append_range(Ops, Shuffles);
21803 for (unsigned F = 0; F < Factor; F++) {
21804 Ops.push_back(Builder.getInt32(F));
21805 Builder.CreateIntrinsic(StoreInts, Tys, Ops);
21806 Ops.pop_back();
21807 }
21808 }
21809 };
21810
21811 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21812 // If we generating more than one store, we compute the base address of
21813 // subsequent stores as an offset from the previous.
21814 if (StoreCount > 0)
21815 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21816 BaseAddr, LaneLen * Factor);
21817
21818 SmallVector<Value *, 4> Shuffles;
21819
21820 // Split the shufflevector operands into sub vectors for the new vstN call.
21821 for (unsigned i = 0; i < Factor; i++) {
21822 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21823 if (Mask[IdxI] >= 0) {
21824 Shuffles.push_back(Builder.CreateShuffleVector(
21825 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21826 } else {
21827 unsigned StartMask = 0;
21828 for (unsigned j = 1; j < LaneLen; j++) {
21829 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21830 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21831 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21832 break;
21833 }
21834 }
21835 // Note: If all elements in a chunk are undefs, StartMask=0!
21836 // Note: Filling undef gaps with random elements is ok, since
21837 // those elements were being written anyway (with undefs).
21838 // In the case of all undefs we're defaulting to using elems from 0
21839 // Note: StartMask cannot be negative, it's checked in
21840 // isReInterleaveMask
21841 Shuffles.push_back(Builder.CreateShuffleVector(
21842 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21843 }
21844 }
21845
21846 createStoreIntrinsic(BaseAddr, Shuffles);
21847 }
21848 return true;
21849}
21850
21858
21860 uint64_t &Members) {
21861 if (auto *ST = dyn_cast<StructType>(Ty)) {
21862 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21863 uint64_t SubMembers = 0;
21864 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21865 return false;
21866 Members += SubMembers;
21867 }
21868 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21869 uint64_t SubMembers = 0;
21870 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21871 return false;
21872 Members += SubMembers * AT->getNumElements();
21873 } else if (Ty->isFloatTy()) {
21874 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21875 return false;
21876 Members = 1;
21877 Base = HA_FLOAT;
21878 } else if (Ty->isDoubleTy()) {
21879 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21880 return false;
21881 Members = 1;
21882 Base = HA_DOUBLE;
21883 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21884 Members = 1;
21885 switch (Base) {
21886 case HA_FLOAT:
21887 case HA_DOUBLE:
21888 return false;
21889 case HA_VECT64:
21890 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21891 case HA_VECT128:
21892 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21893 case HA_UNKNOWN:
21894 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21895 case 64:
21896 Base = HA_VECT64;
21897 return true;
21898 case 128:
21899 Base = HA_VECT128;
21900 return true;
21901 default:
21902 return false;
21903 }
21904 }
21905 }
21906
21907 return (Members > 0 && Members <= 4);
21908}
21909
21910/// Return the correct alignment for the current calling convention.
21912 Type *ArgTy, const DataLayout &DL) const {
21913 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21914 if (!ArgTy->isVectorTy())
21915 return ABITypeAlign;
21916
21917 // Avoid over-aligning vector parameters. It would require realigning the
21918 // stack and waste space for no real benefit.
21919 MaybeAlign StackAlign = DL.getStackAlignment();
21920 assert(StackAlign && "data layout string is missing stack alignment");
21921 return std::min(ABITypeAlign, *StackAlign);
21922}
21923
21924/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21925/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21926/// passing according to AAPCS rules.
21928 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21929 const DataLayout &DL) const {
21930 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21932 return false;
21933
21935 uint64_t Members = 0;
21936 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21937 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
21938
21939 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21940 return IsHA || IsIntArray;
21941}
21942
21944 const Constant *PersonalityFn) const {
21945 // Platforms which do not use SjLj EH may return values in these registers
21946 // via the personality function.
21947 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
21948}
21949
21951 const Constant *PersonalityFn) const {
21952 // Platforms which do not use SjLj EH may return values in these registers
21953 // via the personality function.
21954 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
21955}
21956
21957void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21958 // Update IsSplitCSR in ARMFunctionInfo.
21959 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21960 AFI->setIsSplitCSR(true);
21961}
21962
21963void ARMTargetLowering::insertCopiesSplitCSR(
21964 MachineBasicBlock *Entry,
21965 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21966 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21967 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21968 if (!IStart)
21969 return;
21970
21971 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21972 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21973 MachineBasicBlock::iterator MBBI = Entry->begin();
21974 for (const MCPhysReg *I = IStart; *I; ++I) {
21975 const TargetRegisterClass *RC = nullptr;
21976 if (ARM::GPRRegClass.contains(*I))
21977 RC = &ARM::GPRRegClass;
21978 else if (ARM::DPRRegClass.contains(*I))
21979 RC = &ARM::DPRRegClass;
21980 else
21981 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
21982
21983 Register NewVR = MRI->createVirtualRegister(RC);
21984 // Create copy from CSR to a virtual register.
21985 // FIXME: this currently does not emit CFI pseudo-instructions, it works
21986 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21987 // nounwind. If we want to generalize this later, we may need to emit
21988 // CFI pseudo-instructions.
21989 assert(Entry->getParent()->getFunction().hasFnAttribute(
21990 Attribute::NoUnwind) &&
21991 "Function should be nounwind in insertCopiesSplitCSR!");
21992 Entry->addLiveIn(*I);
21993 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21994 .addReg(*I);
21995
21996 // Insert the copy-back instructions right before the terminator.
21997 for (auto *Exit : Exits)
21998 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21999 TII->get(TargetOpcode::COPY), *I)
22000 .addReg(NewVR);
22001 }
22002}
22003
22007}
22008
22010 return Subtarget->hasMVEIntegerOps();
22011}
22012
22015 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22016 if (!VTy)
22017 return false;
22018
22019 auto *ScalarTy = VTy->getScalarType();
22020 unsigned NumElements = VTy->getNumElements();
22021
22022 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22023 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22024 return false;
22025
22026 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22027 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22028 return Subtarget->hasMVEFloatOps();
22029
22031 return false;
22032
22033 return Subtarget->hasMVEIntegerOps() &&
22034 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22035 ScalarTy->isIntegerTy(32));
22036}
22037
22040 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22041 Value *Accumulator) const {
22042
22043 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22044
22045 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22046
22047 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22048
22049 if (TyWidth > 128) {
22050 int Stride = Ty->getNumElements() / 2;
22051 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22052 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22053 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22054 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22055
22056 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22057 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22058 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22059 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22060 Value *LowerSplitAcc = nullptr;
22061 Value *UpperSplitAcc = nullptr;
22062
22063 if (Accumulator) {
22064 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22065 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22066 }
22067
22068 auto *LowerSplitInt = createComplexDeinterleavingIR(
22069 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22070 auto *UpperSplitInt = createComplexDeinterleavingIR(
22071 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22072
22073 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22074 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22075 }
22076
22077 auto *IntTy = Type::getInt32Ty(B.getContext());
22078
22079 ConstantInt *ConstRotation = nullptr;
22080 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22081 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22082
22083 if (Accumulator)
22084 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22085 {ConstRotation, Accumulator, InputB, InputA});
22086 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22087 {ConstRotation, InputB, InputA});
22088 }
22089
22090 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22091 // 1 means the value is not halved.
22092 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22093
22095 ConstRotation = ConstantInt::get(IntTy, 0);
22097 ConstRotation = ConstantInt::get(IntTy, 1);
22098
22099 if (!ConstRotation)
22100 return nullptr; // Invalid rotation for arm_mve_vcaddq
22101
22102 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22103 {ConstHalving, ConstRotation, InputA, InputB});
22104 }
22105
22106 return nullptr;
22107}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
unsigned RegSize
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
static const LLT F64
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V)
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
constexpr MVT FlagsVT
Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
static bool isIntrinsic(const CallBase &Call, Intrinsic::ID ID)
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1479
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1321
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
unsigned logBase2() const
Definition: APInt.h:1739
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:349
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:358
bool hasARMOps() const
Definition: ARMSubtarget.h:302
bool supportsTailCall() const
Definition: ARMSubtarget.h:427
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:335
bool hasVFP4Base() const
Definition: ARMSubtarget.h:310
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool useFPVFMx() const
Definition: ARMSubtarget.h:319
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:311
bool isThumb2() const
Definition: ARMSubtarget.h:404
bool isTargetWindows() const
Definition: ARMSubtarget.h:345
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:325
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:242
bool useSjLjEH() const
Definition: ARMSubtarget.h:324
bool isTargetDarwin() const
Definition: ARMSubtarget.h:337
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
bool hasVFP2Base() const
Definition: ARMSubtarget.h:308
bool isTargetAndroid() const
Definition: ARMSubtarget.h:389
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:347
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:363
bool hasVFP3Base() const
Definition: ARMSubtarget.h:309
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:323
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:339
unsigned getPreferBranchLogAlignment() const
Definition: ARMSubtarget.h:514
bool hasMinSize() const
Definition: ARMSubtarget.h:402
bool isTargetIOS() const
Definition: ARMSubtarget.h:338
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:304
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:461
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:340
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:313
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:341
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:435
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:429
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:370
bool isTargetLinux() const
Definition: ARMSubtarget.h:342
bool useFPVFMx16() const
Definition: ARMSubtarget.h:322
bool isMClass() const
Definition: ARMSubtarget.h:405
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:317
bool isTargetELF() const
Definition: ARMSubtarget.h:348
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:471
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
bool isFloatingPointOperation() const
Definition: Instructions.h:882
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
The address of a basic block.
Definition: Constants.h:893
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1342
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1425
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1502
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition: DataLayout.h:227
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:988
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:285
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:563
unsigned getNumElements() const
Definition: DerivedTypes.h:606
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
arg_iterator arg_begin()
Definition: Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:688
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:108
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2156
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2549
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2141
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1474
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:188
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:500
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1453
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2027
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2527
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2136
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2443
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2013
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1534
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:583
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2172
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2699
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:497
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
void reserve(size_type NumEntries)
Definition: SmallPtrSet.h:112
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallSet.h:168
bool erase(const T &V)
Definition: SmallSet.h:193
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
const unsigned char * bytes_end() const
Definition: StringRef.h:131
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
const unsigned char * bytes_begin() const
Definition: StringRef.h:128
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:409
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:510
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:650
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
Type * getArrayElementType() const
Definition: Type.h:411
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1069
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:512
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1073
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1092
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:157
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1096
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:788
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1087
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1064
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1433
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:112
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1639
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1555
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1557
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
@ Read
Definition: CodeGenData.h:107
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:267
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1558
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:301
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:458
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:293
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition: KnownBits.h:336
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:804
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)